aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCathal Corbett <cathal.corbett@arm.com>2023-01-12 11:17:03 +0000
committerCathal Corbett <cathal.corbett@arm.com>2023-01-12 11:18:21 +0000
commitd69c1c595375b904a7f19f562ac1d54098184b4e (patch)
treeb2c4980eb367aa160282aae5c2deda8ef19682de /src
parent267c985a6322fbc1efa22ba44188ac867537f1b1 (diff)
downloadarmnn-d69c1c595375b904a7f19f562ac1d54098184b4e.tar.gz
Merge 'main' onto 'experimental/GpuFsa'.
* I6c71be11e9b73694747b27fe9febab8d9669b4d4 Signed-off-by: Cathal Corbett <cathal.corbett@arm.com> Change-Id: Iccaf50e2484559979d801ee9d0e130e848554733
Diffstat (limited to 'src')
-rw-r--r--src/armnn/AsyncExecutionCallback.cpp4
-rw-r--r--src/armnn/AsyncExecutionCallback.hpp3
-rw-r--r--src/armnn/Graph.cpp18
-rw-r--r--src/armnn/LoadedNetwork.cpp2
-rw-r--r--src/armnn/Network.cpp14
-rw-r--r--src/armnn/Runtime.cpp25
-rw-r--r--src/armnn/Runtime.hpp6
-rw-r--r--src/armnn/TypesUtils.cpp6
-rw-r--r--src/armnn/test/SubgraphViewTests.cpp29
-rw-r--r--src/armnnOnnxParser/OnnxParser.cpp51
-rw-r--r--src/armnnOnnxParser/OnnxParser.hpp10
-rwxr-xr-xsrc/armnnSerializer/CMakeLists.txt25
-rwxr-xr-xsrc/armnnTestUtils/CMakeLists.txt6
-rwxr-xr-xsrc/armnnTfLiteParser/CMakeLists.txt8
-rw-r--r--src/armnnTfLiteParser/TfLiteParser.cpp106
-rw-r--r--src/armnnTfLiteParser/TfLiteParser.hpp8
-rw-r--r--src/armnnTfLiteParser/test/Conv2D.cpp2
-rw-r--r--src/armnnUtils/TensorUtils.cpp91
-rw-r--r--src/armnnUtils/test/TensorUtilsTest.cpp173
-rw-r--r--src/backends/aclCommon/ArmComputeTuningUtils.cpp60
-rw-r--r--src/backends/aclCommon/ArmComputeTuningUtils.hpp84
-rw-r--r--src/backends/aclCommon/CMakeLists.txt5
-rw-r--r--src/backends/aclCommon/IClTensorHandle.hpp (renamed from src/backends/cl/IClTensorHandle.hpp)0
-rw-r--r--src/backends/aclCommon/common.mk1
-rw-r--r--src/backends/backendsCommon/CMakeLists.txt4
-rw-r--r--src/backends/backendsCommon/test/BackendProfilingTests.cpp7
-rw-r--r--src/backends/backendsCommon/test/CMakeLists.txt4
-rw-r--r--src/backends/backendsCommon/test/MultiplicationEndToEndTestImpl.hpp96
-rw-r--r--src/backends/backendsCommon/test/OptimizationViewsTests.cpp13
-rw-r--r--src/backends/backendsCommon/test/SubtractionEndToEndTestImpl.hpp96
-rw-r--r--src/backends/cl/CMakeLists.txt3
-rw-r--r--src/backends/cl/ClBackendContext.cpp133
-rw-r--r--src/backends/cl/ClBackendContext.hpp5
-rw-r--r--src/backends/cl/ClContextControl.cpp53
-rw-r--r--src/backends/cl/ClContextControl.hpp22
-rw-r--r--src/backends/cl/ClImportTensorHandle.hpp4
-rw-r--r--src/backends/cl/ClLayerSupport.cpp21
-rw-r--r--src/backends/cl/ClLayerSupport.hpp6
-rw-r--r--src/backends/cl/ClTensorHandle.hpp4
-rw-r--r--src/backends/cl/ClTensorHandleFactory.cpp6
-rw-r--r--src/backends/cl/ClTensorHandleFactory.hpp6
-rw-r--r--src/backends/cl/ClWorkloadFactory.cpp5
-rw-r--r--src/backends/cl/backend.mk1
-rw-r--r--src/backends/cl/test/CMakeLists.txt4
-rw-r--r--src/backends/cl/test/ClDefaultAllocatorTests.cpp (renamed from src/backends/cl/test/DefaultAllocatorTests.cpp)2
-rw-r--r--src/backends/cl/test/ClLayerTests.cpp23
-rw-r--r--src/backends/cl/workloads/CMakeLists.txt2
-rw-r--r--src/backends/cl/workloads/ClBatchMatMulWorkload.cpp203
-rw-r--r--src/backends/cl/workloads/ClBatchMatMulWorkload.hpp41
-rw-r--r--src/backends/cl/workloads/ClWorkloads.hpp1
-rw-r--r--src/backends/dynamic/reference/CMakeLists.txt6
-rw-r--r--src/backends/neon/NeonLayerSupport.cpp608
-rw-r--r--src/backends/tosaCommon/TosaMappings.cpp9
-rw-r--r--src/backends/tosaCommon/operatorMappings/AdditionOperator.hpp20
-rw-r--r--src/backends/tosaCommon/operatorMappings/CMakeLists.txt6
-rw-r--r--src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp103
-rw-r--r--src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.hpp20
-rw-r--r--src/backends/tosaCommon/operatorMappings/TosaCommonOperators.hpp7
-rw-r--r--src/backends/tosaCommon/operatorMappings/TransposeOperator.cpp (renamed from src/backends/tosaCommon/operatorMappings/AdditionOperator.cpp)39
-rw-r--r--src/backends/tosaCommon/operatorMappings/TransposeOperator.hpp20
-rw-r--r--src/backends/tosaCommon/test/OneToOneMappingTests.cpp106
-rw-r--r--src/backends/tosaCommon/test/TosaTestUtils.hpp8
-rw-r--r--src/backends/tosaReference/TosaRefLayerSupport.cpp4
-rw-r--r--src/backends/tosaReference/test/TosaRefEndToEndTests.cpp38
-rw-r--r--src/backends/tosaReference/test/TosaRefLayerSupportTests.cpp130
65 files changed, 1983 insertions, 643 deletions
diff --git a/src/armnn/AsyncExecutionCallback.cpp b/src/armnn/AsyncExecutionCallback.cpp
index 5b87927af2..73ce66b7fb 100644
--- a/src/armnn/AsyncExecutionCallback.cpp
+++ b/src/armnn/AsyncExecutionCallback.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2021-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -11,6 +11,8 @@ namespace armnn
namespace experimental
{
+InferenceId AsyncExecutionCallback::nextID = 0u;
+
void AsyncExecutionCallback::Notify(armnn::Status status, InferenceTimingPair timeTaken)
{
{
diff --git a/src/armnn/AsyncExecutionCallback.hpp b/src/armnn/AsyncExecutionCallback.hpp
index 9eab06b4fa..d48f80737d 100644
--- a/src/armnn/AsyncExecutionCallback.hpp
+++ b/src/armnn/AsyncExecutionCallback.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2021-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -68,7 +68,6 @@ private:
armnn::Status m_Status = Status::Failure;
InferenceId m_InferenceId;
};
-InferenceId AsyncExecutionCallback::nextID = 0u;
// Manager to create and monitor AsyncExecutionCallbacks
// GetNewCallback will create a callback for use in Threadpool::Schedule
diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp
index b5769f75f3..e5d123830c 100644
--- a/src/armnn/Graph.cpp
+++ b/src/armnn/Graph.cpp
@@ -497,13 +497,19 @@ void Graph::ReplaceSubgraphConnections(const SubgraphView& subgraph, const Subgr
IInputSlot* subgraphInputSlot = subgraphInputSlots.at(inputSlotIdx);
ARMNN_ASSERT(subgraphInputSlot);
- IOutputSlot* connectedOutputSlot = subgraphInputSlot->GetConnection();
- ARMNN_ASSERT(connectedOutputSlot);
- connectedOutputSlot->Disconnect(*subgraphInputSlot);
+ // Only disconnect if the InputSlot has a connection, this might not be the case when
+ // dealing with working copies of SubgraphViews
+ // Note: we don't need this check for OutputSlot as it iterates over a vector of valid connections
+ if (subgraphInputSlot->GetConnection())
+ {
+ IOutputSlot* connectedOutputSlot = subgraphInputSlot->GetConnection();
+ ARMNN_ASSERT(connectedOutputSlot);
+ connectedOutputSlot->Disconnect(*subgraphInputSlot);
- IInputSlot* substituteInputSlot = substituteSubgraphInputSlots.at(inputSlotIdx);
- ARMNN_ASSERT(substituteInputSlot);
- connectedOutputSlot->Connect(*substituteInputSlot);
+ IInputSlot* substituteInputSlot = substituteSubgraphInputSlots.at(inputSlotIdx);
+ ARMNN_ASSERT(substituteInputSlot);
+ connectedOutputSlot->Connect(*substituteInputSlot);
+ }
}
// Step 2: process output slots
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 7b24fd77b8..b42874f29d 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -1328,6 +1328,7 @@ void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, ITensorHandle*
}
else
{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "CopyInput");
std::unique_ptr<ITensorHandle> tensorHandle =
std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
@@ -1374,6 +1375,7 @@ void LoadedNetwork::ImportOutputTensor(const Tensor& outputTensor, ITensorHandle
void CopyToOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "CopyOutput");
auto copyFunc = [](void* dst, const void* src, size_t size)
{
memcpy(dst, src, size);
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 158142f48e..42388bfbd7 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -626,7 +626,14 @@ OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings,
// need to set the compute device on the layer
// before we can check if it is supported
layer->SetBackendId(backend);
- if (!IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported))
+
+ // To run FP16 operations on CpuAcc we need at least v8.2 architecture. If the available architecture
+ // is older than v8.2, we can check if the operator is supported by changing operator inputs & outputs
+ // to be FP32 and inserting convert layers around the FP32 operator.
+ bool isLayerSupported = IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported);
+ std::string checkStr = "This CPU architecture does not support F16 data type, you need v8.2 or above";
+ if (!isLayerSupported ||
+ reasonIfUnsupported.find(checkStr) != std::string::npos)
{
if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16)
{
@@ -1568,8 +1575,6 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph,
const OptimizerOptions& options,
Optional<std::vector<std::string>&> messages)
{
- const auto start_time = armnn::GetTimeNow();
-
ARMNN_LOG(debug) << options.ToString();
// Enable profiling
@@ -1750,9 +1755,6 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph,
optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry);
}
- ARMNN_LOG(info) << "!! New time !! : " << std::setprecision(2)
- << std::fixed << armnn::GetTimeDuration(start_time).count() << " ms.";
-
return optNet;
}
diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp
index 75b1ee8179..ff64e856f4 100644
--- a/src/armnn/Runtime.cpp
+++ b/src/armnn/Runtime.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017, 2022-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -20,7 +20,10 @@
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <armnn/utility/Timer.hpp>
+#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC)
#include <backendsCommon/DynamicBackendUtils.hpp>
+#endif
+
#include <backendsCommon/memoryOptimizerStrategyLibrary/MemoryOptimizerStrategyLibrary.hpp>
#include <client/include/backends/IBackendProfiling.hpp>
@@ -334,11 +337,11 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options)
throw RuntimeException(
"It is not possible to enable timeline reporting without profiling being enabled");
}
-
+#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC)
// Load any available/compatible dynamic backend before the runtime
// goes through the backend registry
LoadDynamicBackends(options.m_DynamicBackendsPath);
-
+#endif
armnn::BackendIdSet supportedBackends;
for (const auto& id : BackendRegistryInstance().GetBackendIds())
{
@@ -354,9 +357,11 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options)
if (customAllocatorMapIterator != options.m_CustomAllocatorMap.end() &&
customAllocatorMapIterator->second == nullptr)
{
+#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC)
// We need to manually clean up the dynamic backends before throwing an exception.
DynamicBackendUtils::DeregisterDynamicBackends(m_DeviceSpec.GetDynamicBackends());
m_DeviceSpec.ClearDynamicBackends();
+#endif
throw armnn::Exception("Allocator associated with id " + id.Get() + " is null");
}
@@ -393,6 +398,7 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options)
}
// No errors so register the Custom Allocator with the BackendRegistry
BackendRegistryInstance().RegisterAllocator(id, customAllocatorMapIterator->second);
+ m_AllocatorsAddedByThisRuntime.emplace(id);
}
else
{
@@ -428,6 +434,7 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options)
}
// No errors so register the Custom Allocator with the BackendRegistry
BackendRegistryInstance().RegisterAllocator(id, customAllocatorMapIterator->second);
+ m_AllocatorsAddedByThisRuntime.emplace(id);
}
}
@@ -577,13 +584,20 @@ RuntimeImpl::~RuntimeImpl()
<< std::endl;
}
}
-
+#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC)
// Clear all dynamic backends.
DynamicBackendUtils::DeregisterDynamicBackends(m_DeviceSpec.GetDynamicBackends());
m_DeviceSpec.ClearDynamicBackends();
+#endif
m_BackendContexts.clear();
BackendRegistryInstance().SetProfilingService(armnn::EmptyOptional());
+ // Remove custom allocators that this runtime has added.
+ // Note: that as backends can be per process and there can be many instances of a runtime in a process an allocator
+ // may have been overwritten by another runtime.
+ for_each(m_AllocatorsAddedByThisRuntime.begin(), m_AllocatorsAddedByThisRuntime.end(),
+ [](BackendId id) {BackendRegistryInstance().DeregisterAllocator(id);});
+
ARMNN_LOG(info) << "Shutdown time: " << std::setprecision(2)
<< std::fixed << armnn::GetTimeDuration(startTime).count() << " ms.";
}
@@ -755,6 +769,7 @@ void RuntimeImpl::RegisterDebugCallback(NetworkId networkId, const DebugCallback
loadedNetwork->RegisterDebugCallback(func);
}
+#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC)
void RuntimeImpl::LoadDynamicBackends(const std::string& overrideBackendPath)
{
// Get the paths where to load the dynamic backends from
@@ -772,5 +787,5 @@ void RuntimeImpl::LoadDynamicBackends(const std::string& overrideBackendPath)
// Add the registered dynamic backend ids to the list of supported backends
m_DeviceSpec.AddSupportedBackends(registeredBackendIds, true);
}
-
+#endif
} // namespace armnn
diff --git a/src/armnn/Runtime.hpp b/src/armnn/Runtime.hpp
index f5dfadf948..9d47b7898d 100644
--- a/src/armnn/Runtime.hpp
+++ b/src/armnn/Runtime.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
@@ -157,6 +157,10 @@ private:
/// Profiling Service Instance
std::unique_ptr<arm::pipe::IProfilingService> m_ProfilingService;
+
+ /// Keep track of backend ids of the custom allocators that this instance of the runtime added. The
+ /// destructor can then clean up for this runtime.
+ std::set<BackendId> m_AllocatorsAddedByThisRuntime;
};
} // namespace armnn
diff --git a/src/armnn/TypesUtils.cpp b/src/armnn/TypesUtils.cpp
index 4ba9ed19e1..74ac231bc9 100644
--- a/src/armnn/TypesUtils.cpp
+++ b/src/armnn/TypesUtils.cpp
@@ -81,4 +81,8 @@ float armnn::Dequantize<int16_t>(int16_t value, float scale, int32_t offset);
/// Explicit specialization of Dequantize for int32_t
template
-float armnn::Dequantize<int32_t>(int32_t value, float scale, int32_t offset); \ No newline at end of file
+float armnn::Dequantize<int32_t>(int32_t value, float scale, int32_t offset);
+
+/// Explicit specialization of Dequantize for int64_t
+template
+float armnn::Dequantize<int64_t>(int64_t value, float scale, int32_t offset);
diff --git a/src/armnn/test/SubgraphViewTests.cpp b/src/armnn/test/SubgraphViewTests.cpp
index 4ce67b0fec..9bb5e69bbb 100644
--- a/src/armnn/test/SubgraphViewTests.cpp
+++ b/src/armnn/test/SubgraphViewTests.cpp
@@ -2063,6 +2063,35 @@ TEST_CASE("SubgraphViewWorkingCopySubstituteSubgraph")
CHECK_THROWS_AS(workingCopy.GetWorkingCopy(), Exception);
}
+TEST_CASE("SubgraphViewPartialWorkingCopySubstituteSubgraph")
+{
+ Graph graph;
+
+ auto input = graph.AddLayer<InputLayer>(0, "Input");
+ auto activation = graph.AddLayer<ActivationLayer>(ActivationDescriptor{}, "Activation");
+ auto output = graph.AddLayer<OutputLayer>(1, "Output");
+
+ input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
+ activation->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ //Add in out of order
+ auto view = CreateSubgraphViewFrom({activation},
+ {&activation->GetInputSlot(0)},
+ {&activation->GetOutputSlot(0)});
+
+ auto workingCopy = view->GetWorkingCopy();
+
+ // First (and only) layer in the subgraph is the Activation
+ CHECK(std::string((*workingCopy.beginIConnectable())->GetName()) == "Activation");
+
+ // Substitute the "Activation" layer for an equivalent layer
+ auto activation2 = graph.AddLayer<ActivationLayer>(ActivationDescriptor{}, "Activation2");
+ SubgraphView pattern(*workingCopy.beginIConnectable());
+ workingCopy.SubstituteSubgraph(pattern, activation2);
+
+ CHECK(std::string((*workingCopy.beginIConnectable())->GetName()) == "Activation2");
+}
+
TEST_CASE("SubgraphViewWorkingCopyOptimizationViews")
{
Graph graph;
diff --git a/src/armnnOnnxParser/OnnxParser.cpp b/src/armnnOnnxParser/OnnxParser.cpp
index 63fb60382c..552d4e4163 100644
--- a/src/armnnOnnxParser/OnnxParser.cpp
+++ b/src/armnnOnnxParser/OnnxParser.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "OnnxParser.hpp"
@@ -50,6 +50,17 @@ armnn::INetworkPtr IOnnxParser::CreateNetworkFromBinaryFile(const char* graphFil
return pOnnxParserImpl->CreateNetworkFromBinaryFile(graphFile);
}
+armnn::INetworkPtr IOnnxParser::CreateNetworkFromBinary(const std::vector<uint8_t>& binaryContent)
+{
+ return pOnnxParserImpl->CreateNetworkFromBinary(binaryContent);
+}
+
+armnn::INetworkPtr IOnnxParser::CreateNetworkFromBinary(const std::vector<uint8_t>& binaryContent,
+ const std::map<std::string, armnn::TensorShape>& inputShapes)
+{
+ return pOnnxParserImpl->CreateNetworkFromBinary(binaryContent, inputShapes);
+}
+
armnn::INetworkPtr IOnnxParser::CreateNetworkFromTextFile(const char* graphFile)
{
return pOnnxParserImpl->CreateNetworkFromTextFile(graphFile);
@@ -731,6 +742,44 @@ INetworkPtr OnnxParserImpl::CreateNetworkFromTextFile(const char* graphFile,
return CreateNetworkFromModel(*modelProto);
}
+INetworkPtr OnnxParserImpl::CreateNetworkFromBinary(const std::vector<uint8_t>& binaryContent)
+{
+ ResetParser();
+ ModelPtr modelProto = LoadModelFromBinary(binaryContent);
+ return CreateNetworkFromModel(*modelProto);
+}
+
+INetworkPtr OnnxParserImpl::CreateNetworkFromBinary(const std::vector<uint8_t>& binaryContent,
+ const std::map<std::string, armnn::TensorShape>& inputShapes)
+{
+ ResetParser();
+ m_InputShapes = inputShapes;
+ ModelPtr modelProto = LoadModelFromBinary(binaryContent);
+ return CreateNetworkFromModel(*modelProto);
+}
+
+ModelPtr OnnxParserImpl::LoadModelFromBinary(const std::vector<uint8_t>& binaryContent)
+{
+ if (binaryContent.size() == 0)
+ {
+ throw ParseException(fmt::format("Missing binary content", CHECK_LOCATION().AsString()));
+ }
+ // Parse the file into a message
+ ModelPtr modelProto = std::make_unique<onnx::ModelProto>();
+
+ google::protobuf::io::CodedInputStream codedStream(binaryContent.data(), static_cast<int>(binaryContent.size()));
+ codedStream.SetTotalBytesLimit(INT_MAX);
+ bool success = modelProto.get()->ParseFromCodedStream(&codedStream);
+
+ if (!success)
+ {
+ std::stringstream error;
+ error << "Failed to parse graph";
+ throw ParseException(fmt::format("{} {}", error.str(), CHECK_LOCATION().AsString()));
+ }
+ return modelProto;
+}
+
ModelPtr OnnxParserImpl::LoadModelFromBinaryFile(const char* graphFile)
{
FILE* fd = fopen(graphFile, "rb");
diff --git a/src/armnnOnnxParser/OnnxParser.hpp b/src/armnnOnnxParser/OnnxParser.hpp
index bb94472c6d..c9f321a5b5 100644
--- a/src/armnnOnnxParser/OnnxParser.hpp
+++ b/src/armnnOnnxParser/OnnxParser.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
@@ -38,6 +38,13 @@ public:
armnn::INetworkPtr CreateNetworkFromBinaryFile(const char* graphFile,
const std::map<std::string, armnn::TensorShape>& inputShapes);
+ /// Create the network from a protobuf binary
+ armnn::INetworkPtr CreateNetworkFromBinary(const std::vector<uint8_t>& binaryContent);
+
+ /// Create the network from a protobuf binary, with inputShapes specified
+ armnn::INetworkPtr CreateNetworkFromBinary(const std::vector<uint8_t>& binaryContent,
+ const std::map<std::string, armnn::TensorShape>& inputShapes);
+
/// Create the network from a protobuf text file on disk
armnn::INetworkPtr CreateNetworkFromTextFile(const char* graphFile);
@@ -64,6 +71,7 @@ public:
OnnxParserImpl();
~OnnxParserImpl() = default;
+ static ModelPtr LoadModelFromBinary(const std::vector<uint8_t>& binaryContent);
static ModelPtr LoadModelFromBinaryFile(const char * fileName);
static ModelPtr LoadModelFromTextFile(const char * fileName);
static ModelPtr LoadModelFromString(const std::string& inputString);
diff --git a/src/armnnSerializer/CMakeLists.txt b/src/armnnSerializer/CMakeLists.txt
index 8acdafbc28..01c51e65d7 100755
--- a/src/armnnSerializer/CMakeLists.txt
+++ b/src/armnnSerializer/CMakeLists.txt
@@ -1,5 +1,5 @@
#
-# Copyright © 2017, 2019-2020, 2022 Arm Ltd and Contributors. All rights reserved.
+# Copyright © 2017, 2019-2020, 2022-2023 Arm Ltd and Contributors. All rights reserved.
# SPDX-License-Identifier: MIT
#
if(BUILD_ARMNN_SERIALIZER)
@@ -36,7 +36,7 @@ if(BUILD_ARMNN_SERIALIZER)
../armnnDeserializer/Deserializer.cpp
)
- if(BUILD_BARE_METAL)
+ if(BUILD_BARE_METAL OR EXECUTE_NETWORK_STATIC)
add_library_ex(armnnSerializer STATIC ${armnn_serializer_sources})
else()
# We're going to export both a STATIC library and a SHARED library here.
@@ -52,9 +52,11 @@ if(BUILD_ARMNN_SERIALIZER)
target_include_directories(armnnSerializer PRIVATE ../armnn)
target_include_directories(armnnSerializer PRIVATE ../armnnUtils)
target_include_directories(armnnSerializer PRIVATE ../../generated)
- target_include_directories(armnnSerializer-static PRIVATE ../armnn)
- target_include_directories(armnnSerializer-static PRIVATE ../armnnUtils)
- target_include_directories(armnnSerializer-static PRIVATE ../../generated)
+ if (NOT BARE_METAL AND NOT EXECUTE_NETWORK_STATIC)
+ target_include_directories(armnnSerializer-static PRIVATE ../armnn)
+ target_include_directories(armnnSerializer-static PRIVATE ../armnnUtils)
+ target_include_directories(armnnSerializer-static PRIVATE ../../generated)
+ endif()
list(APPEND armnn_serializer_sources
ArmnnSchema_generated.h
@@ -64,12 +66,13 @@ if(BUILD_ARMNN_SERIALIZER)
target_include_directories(armnnSerializer SYSTEM PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
target_link_libraries(armnnSerializer armnn ${FLATBUFFERS_LIBRARY})
-
- install(TARGETS armnnSerializer-static
- EXPORT armnn-targets
- LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
- ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
- )
+ if (NOT BARE_METAL AND NOT EXECUTE_NETWORK_STATIC)
+ install(TARGETS armnnSerializer-static
+ EXPORT armnn-targets
+ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ )
+ endif()
install(TARGETS armnnSerializer
EXPORT armnn-targets
diff --git a/src/armnnTestUtils/CMakeLists.txt b/src/armnnTestUtils/CMakeLists.txt
index 3f6fb415a2..a4333cf306 100755
--- a/src/armnnTestUtils/CMakeLists.txt
+++ b/src/armnnTestUtils/CMakeLists.txt
@@ -1,5 +1,5 @@
#
-# Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+# Copyright © 2021, 2023 Arm Ltd and Contributors. All rights reserved.
# SPDX-License-Identifier: MIT
#
@@ -31,7 +31,7 @@ list(APPEND armnnTestUtils_sources
TestUtils.hpp
)
-if(NOT BUILD_BARE_METAL)
+if(NOT BUILD_BARE_METAL AND NOT EXECUTE_NETWORK_STATIC)
list(APPEND armnnTestUtils_sources
UnitTests.cpp
UnitTests.hpp
@@ -41,6 +41,8 @@ endif()
if(BUILD_BARE_METAL)
add_library_ex(armnnTestUtils STATIC ${armnnTestUtils_sources})
+elseif(EXECUTE_NETWORK_STATIC)
+ add_library_ex(armnnTestUtils OBJECT ${armnnTestUtils_sources})
else()
add_library_ex(armnnTestUtils SHARED ${armnnTestUtils_sources})
endif()
diff --git a/src/armnnTfLiteParser/CMakeLists.txt b/src/armnnTfLiteParser/CMakeLists.txt
index f9653b6752..6096d1bf8c 100755
--- a/src/armnnTfLiteParser/CMakeLists.txt
+++ b/src/armnnTfLiteParser/CMakeLists.txt
@@ -1,5 +1,5 @@
#
-# Copyright © 2017 Arm Ltd. All rights reserved.
+# Copyright © 2017, 2023 Arm Ltd. All rights reserved.
# SPDX-License-Identifier: MIT
#
if(BUILD_TF_LITE_PARSER)
@@ -11,7 +11,11 @@ if(BUILD_TF_LITE_PARSER)
TfLiteParser.cpp
)
- add_library_ex(armnnTfLiteParser SHARED ${armnn_tf_lite_parser_sources})
+ if(EXECUTE_NETWORK_STATIC)
+ add_library_ex(armnnTfLiteParser OBJECT ${armnn_tf_lite_parser_sources})
+ else()
+ add_library_ex(armnnTfLiteParser SHARED ${armnn_tf_lite_parser_sources})
+ endif()
include_directories(SYSTEM "${FLATBUFFERS_INCLUDE_PATH}")
set_target_properties(armnnTfLiteParser PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp
index 0484c6f478..f6c1ee9d38 100644
--- a/src/armnnTfLiteParser/TfLiteParser.cpp
+++ b/src/armnnTfLiteParser/TfLiteParser.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -316,6 +316,14 @@ std::vector<unsigned int> GetUIntBuffer(armnn::TensorInfo info,
::memcpy(uint64Buffer.data(), bufferPtr->data.data(), bufferPtr->data.size());
buffer.assign(std::begin(uint64Buffer), std::end(uint64Buffer));
}
+ else
+ {
+ CheckLocation location = CHECK_LOCATION();
+ throw ParseException(
+ fmt::format("Unsupported data type for uint buffer {}, only Signed 32 or Signed 64 are supported. {}",
+ GetDataTypeName(info.GetDataType()),
+ location.AsString()));
+ }
return buffer;
}
@@ -911,42 +919,16 @@ INetworkPtr TfLiteParserImpl::CreateNetworkFromModel()
return std::move(m_Network);
}
-std::unique_ptr<float[]> AsFloatArray(TfLiteParserImpl::BufferRawPtr bufferPtr,
- const TensorInfo& tensorInfo)
+bool TfLiteParserImpl::ShouldConstantTensorBeConverted(TfLiteParserImpl::TensorRawPtr tensorPtr,
+ armnn::DataType inputDataType,
+ armnn::DataType tensorDataType)
{
- if (tensorInfo.GetDataType() == DataType::QAsymmS8 || tensorInfo.GetDataType() == DataType::QSymmS8 ||
- tensorInfo.GetDataType() == DataType::QAsymmU8)
- {
- std::unique_ptr<float[]> buffer(new float[tensorInfo.GetNumElements()]);
-
- if (tensorInfo.HasPerAxisQuantization())
- {
- unsigned int axis = tensorInfo.GetQuantizationDim().value();
- auto axisDimensionality = tensorInfo.GetShape()[axis];
- auto axisFactor = armnnUtils::GetNumElementsAfter(tensorInfo.GetShape(), axis);
-
- for (unsigned int i = 0; i < tensorInfo.GetNumDimensions(); ++i)
- {
- unsigned int axisIndex = (i / axisFactor) % axisDimensionality;
- buffer[i] = Dequantize<int8_t>(bufferPtr->data[i], tensorInfo.GetQuantizationScales()[axisIndex],
- tensorInfo.GetQuantizationOffset());
- }
- }
- else
- {
- for (unsigned int i = 0; i < tensorInfo.GetNumElements(); ++i)
- {
- buffer[i] = Dequantize<int8_t>(bufferPtr->data[i], tensorInfo.GetQuantizationScale(),
- tensorInfo.GetQuantizationOffset());
- }
- }
- return buffer;
- }
- throw ParseException(
- fmt::format("Unsupported input/weights combination: Input {} not supported with Weights {}",
- GetDataTypeName(DataType::Float32),
- GetDataTypeName(tensorInfo.GetDataType()),
- CHECK_LOCATION().AsString()));
+ return (TfLiteParserImpl::IsConstTensor(tensorPtr) && inputDataType == DataType::Float32 &&
+ (tensorDataType == DataType::QAsymmU8 ||
+ tensorDataType == DataType::QAsymmS8 ||
+ tensorDataType == DataType::QSymmS8 ||
+ tensorDataType == DataType::Signed32 ||
+ tensorDataType == DataType::Signed64));
}
void TfLiteParserImpl::RegisterProducerOfTensor(size_t subgraphIndex,
@@ -1136,9 +1118,7 @@ void TfLiteParserImpl::ParseConv2D(size_t subgraphIndex, size_t operatorIndex)
auto layerName = fmt::format("Conv2D:{}:{}", subgraphIndex, operatorIndex);
armnn::IConnectableLayer* layer = m_Network->AddConvolution2dLayer(desc, layerName.c_str());
- if (IsConstTensor(inputs[1]) && inputTensorInfo.GetDataType() == DataType::Float32 &&
- (filterTensorInfo.GetDataType() == DataType::QAsymmU8 ||
- filterTensorInfo.GetDataType() == DataType::QAsymmS8))
+ if (ShouldConstantTensorBeConverted(inputs[1], inputTensorInfo.GetDataType(), filterTensorInfo.GetDataType()))
{
m_ConstantsToDequantize.emplace_back(inputs[1]->buffer);
}
@@ -1150,9 +1130,7 @@ void TfLiteParserImpl::ParseConv2D(size_t subgraphIndex, size_t operatorIndex)
// Add the biases input to the registration list, a constant layer will be added by SetupConstantLayers.
tensorIndexesToRegister.emplace_back(inputTensorIndexes[2]);
- if (IsConstTensor(inputs[2]) && inputTensorInfo.GetDataType() == DataType::Float32 &&
- (filterTensorInfo.GetDataType() == DataType::QAsymmU8 ||
- filterTensorInfo.GetDataType() == DataType::QAsymmS8))
+ if (ShouldConstantTensorBeConverted(inputs[2], inputTensorInfo.GetDataType(), biasTensorInfo.GetDataType()))
{
m_ConstantsToDequantize.emplace_back(inputs[2]->buffer);
}
@@ -3112,9 +3090,7 @@ void TfLiteParserImpl::ParseFullyConnected(size_t subgraphIndex, size_t operator
// Add the weights input to the registration list, constant layers will be added by SetupConstantLayers if constant.
tensorIndexesToRegister.emplace_back(inputTensorIndexes[1]);
- if (desc.m_ConstantWeights && inputTensorInfo.GetDataType() == DataType::Float32 &&
- (filterTensorInfo.GetDataType() == DataType::QAsymmU8 ||
- filterTensorInfo.GetDataType() == DataType::QAsymmS8))
+ if (ShouldConstantTensorBeConverted(inputs[1], inputTensorInfo.GetDataType(), filterTensorInfo.GetDataType()))
{
m_ConstantsToDequantize.emplace_back(inputs[1]->buffer);
}
@@ -3127,9 +3103,7 @@ void TfLiteParserImpl::ParseFullyConnected(size_t subgraphIndex, size_t operator
// Add the biases input to the registration list, constant layer will be added by SetupConstantLayers.
tensorIndexesToRegister.emplace_back(inputTensorIndexes[2]);
- if (desc.m_ConstantWeights && inputTensorInfo.GetDataType() == DataType::Float32 &&
- (biasTensorInfo.GetDataType() == DataType::QAsymmU8 ||
- biasTensorInfo.GetDataType() == DataType::QAsymmS8))
+ if (ShouldConstantTensorBeConverted(inputs[2], inputTensorInfo.GetDataType(), biasTensorInfo.GetDataType()))
{
m_ConstantsToDequantize.emplace_back(inputs[2]->buffer);
}
@@ -4925,11 +4899,22 @@ TfLiteParserImpl::CreateConstTensorNonPermuted(TensorRawPtr tensorPtr,
// Make sure isConstant flag is set.
tensorInfo.SetConstant();
- if (inputDataType == DataType::Float32 && tensorInfo.GetDataType() != DataType::Float32)
+ if (inputDataType == DataType::Float32 && tensorInfo.GetDataType() != DataType::Float32)
{
- TensorInfo constTensorInfo(tensorInfo.GetShape(), DataType::Float32, 0.0f, 0, true);
- std::unique_ptr<float[]> data = AsFloatArray(bufferPtr, tensorInfo);
- return std::make_pair(ConstTensor(constTensorInfo, data.get()), std::move(data));
+ try
+ {
+ TensorInfo constTensorInfo(tensorInfo.GetShape(), DataType::Float32, 0.0f, 0, true);
+ std::unique_ptr<float[]> data = armnnUtils::ToFloatArray(bufferPtr->data, tensorInfo);
+ return std::make_pair(ConstTensor(constTensorInfo, data.get()), std::move(data));
+ }
+ catch (InvalidArgumentException&)
+ {
+ throw ParseException(
+ fmt::format("Unsupported input/weights combination: Input {} not supported with Weights {}",
+ GetDataTypeName(DataType::Float32),
+ GetDataTypeName(tensorInfo.GetDataType()),
+ CHECK_LOCATION().AsString()));
+ }
}
else
{
@@ -4950,9 +4935,20 @@ TfLiteParserImpl::CreateConstTensorPtr(TensorRawPtr tensorPtr, armnn::TensorInfo
if (inputTensorInfo.GetDataType() == DataType::Float32 && tensorInfo.GetDataType() != DataType::Float32)
{
- TensorInfo constTensorInfo(tensorInfo.GetShape(), DataType::Float32, 0.0f, 0, true);
- std::unique_ptr<float[]> data = AsFloatArray(bufferPtr, tensorInfo);
- return std::make_pair(new ConstTensor(constTensorInfo, data.get()), std::move(data));
+ try
+ {
+ TensorInfo constTensorInfo(tensorInfo.GetShape(), DataType::Float32, 0.0f, 0, true);
+ std::unique_ptr<float[]> data = armnnUtils::ToFloatArray(bufferPtr->data, tensorInfo);
+ return std::make_pair(new ConstTensor(constTensorInfo, data.get()), std::move(data));
+ }
+ catch (InvalidArgumentException&)
+ {
+ throw ParseException(
+ fmt::format("Unsupported input/weights combination: Input {} not supported with Weights {}",
+ GetDataTypeName(DataType::Float32),
+ GetDataTypeName(tensorInfo.GetDataType()),
+ CHECK_LOCATION().AsString()));
+ }
}
else
{
diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp
index f8ddc55649..7eb6c48501 100644
--- a/src/armnnTfLiteParser/TfLiteParser.hpp
+++ b/src/armnnTfLiteParser/TfLiteParser.hpp
@@ -242,7 +242,13 @@ private:
};
bool ShouldConstantTensorBeCreated(unsigned int tensorIndex);
+
bool IsConstTensor(TensorRawPtr tensorPtr);
+
+ bool ShouldConstantTensorBeConverted(TfLiteParserImpl::TensorRawPtr tensorPtr,
+ armnn::DataType inputDataType,
+ armnn::DataType filterDataType);
+
armnn::ConstTensor CreateConstTensorNonPermuted(TensorRawPtr tensorPtr,
armnn::TensorInfo& tensorInfo);
@@ -250,6 +256,7 @@ private:
CreateConstTensorPermuted(TensorRawPtr tensorPtr,
armnn::TensorInfo& tensorInfo,
armnn::Optional<armnn::PermutationVector&> permutationVector);
+
std::pair<armnn::ConstTensor, std::unique_ptr<float[]>>
CreateConstTensorNonPermuted(TensorRawPtr tensorPtr,
armnn::TensorInfo& tensorInfo,
@@ -261,6 +268,7 @@ private:
TfLiteParserImpl::TensorRawPtr tensorPtr,
armnn::TensorInfo& tensorInfo,
armnn::Optional<armnn::PermutationVector&> permutationVector);
+
std::pair<armnn::ConstTensor*, std::unique_ptr<float[]>>
CreateConstTensorPtr(TensorRawPtr tensorPtr,
armnn::TensorInfo& inputTensorInfo);
diff --git a/src/armnnTfLiteParser/test/Conv2D.cpp b/src/armnnTfLiteParser/test/Conv2D.cpp
index 45c4a43519..334c102344 100644
--- a/src/armnnTfLiteParser/test/Conv2D.cpp
+++ b/src/armnnTfLiteParser/test/Conv2D.cpp
@@ -673,7 +673,7 @@ struct Conv2FloatWithInt8WeightsAndBiasesFixture : Conv2DWithBiasesFixture
"[ 1, 2, 2, 1 ]", // filterShape
"[ 2,1, 0,6 ]", // filterData
"[ 1 ]", // biasShape
- "[ 10, 0, 0, 0 ]", // biasData
+ "[ 10 ]", // biasData
"1", // stride w and h
"NONE", // activation
"1.0", // filterScale
diff --git a/src/armnnUtils/TensorUtils.cpp b/src/armnnUtils/TensorUtils.cpp
index d77f5d74c3..9e3d719211 100644
--- a/src/armnnUtils/TensorUtils.cpp
+++ b/src/armnnUtils/TensorUtils.cpp
@@ -128,12 +128,11 @@ TensorShape ExpandDims(const TensorShape& tensorShape, int axis)
}
outputShape.insert(outputShape.begin() + axis, 1);
- return TensorShape(outputDim, outputShape.data());
+ return { outputDim, outputShape.data() };
}
std::vector<unsigned int> SqueezeDims(const TensorShape& tensorShape)
{
- unsigned int outputDimSize = 0;
std::vector<unsigned int> squeezedDims;
for (unsigned int i = 0; i < tensorShape.GetNumDimensions(); ++i)
@@ -141,7 +140,6 @@ std::vector<unsigned int> SqueezeDims(const TensorShape& tensorShape)
if (tensorShape[i] != 1)
{
squeezedDims.push_back(tensorShape[i]);
- ++outputDimSize;
}
}
return squeezedDims;
@@ -201,4 +199,91 @@ std::pair<unsigned int, std::vector<float>> GetPerAxisParams(const armnn::Tensor
return { axisFactor, scales };
}
+template<typename PrimitiveType>
+void CheckSizes(const std::vector<PrimitiveType>& data, const armnn::TensorInfo& tensorInfo, unsigned int size = 1)
+{
+ if (data.size() / size != tensorInfo.GetNumElements())
+ {
+ throw InvalidArgumentException(
+ fmt::format("The data does not contain the expected number of elements {} != {}. {}",
+ data.size(), tensorInfo.GetNumElements(), CHECK_LOCATION().AsString()));
+ }
+}
+
+template<typename PrimitiveType>
+std::unique_ptr<float[]> ToFloatArray(const std::vector<PrimitiveType>& data, const armnn::TensorInfo& tensorInfo)
+{
+ CheckSizes(data, tensorInfo);
+
+ std::unique_ptr<float[]> returnBuffer(new float[tensorInfo.GetNumElements()]);
+
+ if (tensorInfo.HasPerAxisQuantization())
+ {
+ unsigned int axis = tensorInfo.GetQuantizationDim().value();
+ auto axisDimensionality = tensorInfo.GetShape()[axis];
+ auto axisFactor = armnnUtils::GetNumElementsAfter(tensorInfo.GetShape(), axis);
+
+ for (unsigned int i = 0; i < tensorInfo.GetNumElements(); ++i)
+ {
+ unsigned int axisIndex;
+
+ if (i < axisFactor)
+ {
+ axisIndex = 0;
+ }
+ else
+ {
+ axisIndex = (i / axisFactor) % axisDimensionality;
+ }
+ returnBuffer[i] = Dequantize<PrimitiveType>(data[i],
+ tensorInfo.GetQuantizationScales()[axisIndex],
+ tensorInfo.GetQuantizationOffset());
+ }
+ }
+ else
+ {
+ for (unsigned int i = 0; i < tensorInfo.GetNumElements(); ++i)
+ {
+ returnBuffer[i] = Dequantize<PrimitiveType>(data[i],
+ tensorInfo.GetQuantizationScale(),
+ tensorInfo.GetQuantizationOffset());
+ }
+ }
+ return returnBuffer;
+}
+
+std::unique_ptr<float[]> ToFloatArray(const std::vector<uint8_t>& data, const armnn::TensorInfo& tensorInfo)
+{
+ if (tensorInfo.GetDataType() == DataType::QAsymmS8 || tensorInfo.GetDataType() == DataType::QSymmS8)
+ {
+ CheckSizes(data, tensorInfo);
+ std::vector<int8_t> buffer(tensorInfo.GetNumElements());
+ ::memcpy(buffer.data(), data.data(), data.size());
+ return ToFloatArray<int8_t>(buffer, tensorInfo);
+ }
+ else if (tensorInfo.GetDataType() == DataType::QAsymmU8)
+ {
+ CheckSizes(data, tensorInfo);
+ return ToFloatArray<uint8_t>(data, tensorInfo);
+ }
+ else if (tensorInfo.GetDataType() == DataType::Signed32)
+ {
+ CheckSizes(data, tensorInfo, 4);
+ std::vector<int32_t> buffer(tensorInfo.GetNumElements());
+ ::memcpy(buffer.data(), data.data(), data.size());
+ return ToFloatArray<int32_t>(buffer, tensorInfo);
+ }
+ else if (tensorInfo.GetDataType() == DataType::Signed64)
+ {
+ CheckSizes(data, tensorInfo, 8);
+ std::vector<int64_t> buffer(tensorInfo.GetNumElements());
+ ::memcpy(buffer.data(), data.data(), data.size());
+ return ToFloatArray<int64_t>(buffer, tensorInfo);
+ }
+ throw InvalidArgumentException(
+ fmt::format("Unsupported datatype {}. {}",
+ GetDataTypeName(tensorInfo.GetDataType()),
+ CHECK_LOCATION().AsString()));
+}
+
} // namespace armnnUtils
diff --git a/src/armnnUtils/test/TensorUtilsTest.cpp b/src/armnnUtils/test/TensorUtilsTest.cpp
index 6d5f719eb1..16349c554e 100644
--- a/src/armnnUtils/test/TensorUtilsTest.cpp
+++ b/src/armnnUtils/test/TensorUtilsTest.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2019 Arm Ltd. All rights reserved.
+// Copyright © 2019,2021-2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -134,4 +134,175 @@ TEST_CASE("ExpandDimsInvalidNegativeAxisTest")
CHECK_THROWS_AS(ExpandDims(inputShape, -5), armnn::InvalidArgumentException);
}
+TEST_CASE("ToFloatArrayInvalidDataType")
+{
+ armnn::TensorInfo info({ 2, 3, 4 }, armnn::DataType::BFloat16);
+ std::vector<uint8_t> data {1,2,3,4,5,6,7,8,9,10};
+
+ // Invalid argument
+ CHECK_THROWS_AS(ToFloatArray(data, info), armnn::InvalidArgumentException);
+}
+
+TEST_CASE("ToFloatArrayQSymmS8PerAxis")
+{
+ std::vector<float> quantizationScales { 0.1f, 0.2f, 0.3f, 0.4f };
+ unsigned int quantizationDim = 1;
+
+ armnn::TensorInfo info({ 3, 4 }, armnn::DataType::QSymmS8, quantizationScales, quantizationDim);
+ std::vector<uint8_t> data { 100, 120, 130, 140, 150, 160, 170 ,180, 190, 200, 210, 220 };
+ float expected[] { 10.0f, 24.0f, -37.8f, -46.4f, -10.6f, -19.2f, -25.8f, -30.4f, -6.6f, -11.2f, -13.8f, -14.4f };
+
+ std::unique_ptr<float[]> result = ToFloatArray(data, info);
+
+ for (uint i = 0; i < info.GetNumElements(); ++i)
+ {
+ CHECK_EQ(result[i], doctest::Approx(expected[i]));
+ }
+}
+
+TEST_CASE("ToFloatArrayQSymmS8")
+{
+ armnn::TensorInfo info({ 3, 4 }, armnn::DataType::QSymmS8, 0.1f);
+ std::vector<uint8_t> data { 100, 120, 130, 140, 150, 160, 170 ,180, 190, 200, 210, 220 };
+ float expected[] { 10.0f, 12.0f, -12.6f, -11.6f, -10.6f, -9.6f, -8.6f, -7.6f, -6.6f, -5.6f, -4.6f, -3.6f };
+
+ std::unique_ptr<float[]> result = ToFloatArray(data, info);
+
+ for (uint i = 0; i < info.GetNumElements(); ++i)
+ {
+ CHECK_EQ(result[i], doctest::Approx(expected[i]));
+ }
+}
+
+TEST_CASE("ToFloatArrayQAsymmS8PerAxis")
+{
+ std::vector<float> quantizationScales { 0.1f, 0.2f, 0.3f, 0.4f };
+ unsigned int quantizationDim = 1;
+
+ armnn::TensorInfo info({ 3, 4 }, armnn::DataType::QAsymmS8, quantizationScales, quantizationDim);
+ std::vector<uint8_t> data { 100, 120, 130, 140, 150, 160, 170 ,180, 190, 200, 210, 220 };
+ float expected[] { 10.0f, 24.0f, -37.8f, -46.4f, -10.6f, -19.2f, -25.8f, -30.4f, -6.6f, -11.2f, -13.8f, -14.4f };
+
+ std::unique_ptr<float[]> result = ToFloatArray(data, info);
+
+ for (uint i = 0; i < info.GetNumElements(); ++i)
+ {
+ CHECK_EQ(result[i], doctest::Approx(expected[i]));
+ }
+}
+
+TEST_CASE("ToFloatArrayQAsymmS8")
+{
+ armnn::TensorInfo info({ 3, 4 }, armnn::DataType::QAsymmS8, 0.1f);
+ std::vector<uint8_t> data { 100, 120, 130, 140, 150, 160, 170 ,180, 190, 200, 210, 220 };
+ float expected[] { 10.0f, 12.0f, -12.6f, -11.6f, -10.6f, -9.6f, -8.6f, -7.6f, -6.6f, -5.6f, -4.6f, -3.6f };
+
+ std::unique_ptr<float[]> result = ToFloatArray(data, info);
+
+ for (uint i = 0; i < info.GetNumElements(); ++i)
+ {
+ CHECK_EQ(result[i], doctest::Approx(expected[i]));
+ }
+}
+
+TEST_CASE("ToFloatArrayQASymmU8PerAxis")
+{
+ std::vector<float> quantizationScales { 0.1f, 0.2f, 0.3f, 0.4f };
+ unsigned int quantizationDim = 1;
+
+ armnn::TensorInfo info({ 3, 4 }, armnn::DataType::QAsymmU8, quantizationScales, quantizationDim);
+ std::vector<uint8_t> data { 100, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220 };
+ float expected[] { 10.0f, 24.0f, 39.0f, 56.0f, 15.0f, 32.0f, 51.0f, 72.0f, 19.0f, 40.0f, 63.0f, 88.0f };
+
+ std::unique_ptr<float[]> result = ToFloatArray(data, info);
+
+ for (uint i = 0; i < info.GetNumElements(); ++i)
+ {
+ CHECK_EQ(result[i], doctest::Approx(expected[i]));
+ }
+}
+
+TEST_CASE("ToFloatArrayQAsymmU8")
+{
+ armnn::TensorInfo info({ 3, 4 }, armnn::DataType::QAsymmU8, 0.1f);
+ std::vector<uint8_t> data { 100, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220 };
+ float expected[] { 10.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f };
+
+ std::unique_ptr<float[]> result = ToFloatArray(data, info);
+
+ for (uint i = 0; i < info.GetNumElements(); ++i)
+ {
+ CHECK_EQ(result[i], doctest::Approx(expected[i]));
+ }
+}
+
+TEST_CASE("ToFloatArraySigned32PerAxis")
+{
+ std::vector<float> quantizationScales { 0.1f, 0.2f, 0.3f, 0.4f };
+ unsigned int quantizationDim = 1;
+
+ armnn::TensorInfo info({ 3, 4 }, armnn::DataType::Signed32, quantizationScales, quantizationDim);
+ std::vector<uint8_t> data { 100, 0, 0, 0, 120, 0, 0, 0, 130, 0, 0, 0, 140, 0, 0, 0, 150, 0, 0, 0, 160, 0, 0, 0,
+ 170, 0, 0, 0, 180, 0, 0, 0, 190, 0, 0, 0, 200, 0, 0, 0, 210, 0, 0, 0, 220, 0, 0, 0 };
+ float expected[] { 10.0f, 24.0f, 39.0f, 56.0f, 15.0f, 32.0f, 51.0f, 72.0f, 19.0f, 40.0f, 63.0f, 88.0f };
+
+ std::unique_ptr<float[]> result = ToFloatArray(data, info);
+
+ for (uint i = 0; i < info.GetNumElements(); ++i)
+ {
+ CHECK_EQ(result[i], doctest::Approx(expected[i]));
+ }
+}
+
+TEST_CASE("ToFloatArraySigned32")
+{
+ armnn::TensorInfo info({ 3, 4 }, armnn::DataType::Signed32, 0.1f);
+ std::vector<uint8_t> data { 100, 0, 0, 0, 120, 0, 0, 0, 130, 0, 0, 0, 140, 0, 0, 0, 150, 0, 0, 0, 160, 0, 0, 0,
+ 170, 0, 0, 0, 180, 0, 0, 0, 190, 0, 0, 0, 200, 0, 0, 0, 210, 0, 0, 0, 220, 0, 0, 0 };
+ float expected[] { 10.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f };
+
+ std::unique_ptr<float[]> result = ToFloatArray(data, info);
+
+ for (uint i = 0; i < info.GetNumElements(); ++i)
+ {
+ CHECK_EQ(result[i], doctest::Approx(expected[i]));
+ }
+}
+
+TEST_CASE("ToFloatArraySigned64PerAxis")
+{
+ std::vector<float> quantizationScales { 0.1f, 0.2f, 0.3f, 0.4f };
+ unsigned int quantizationDim = 1;
+
+ armnn::TensorInfo info({ 3, 4 }, armnn::DataType::Signed64, quantizationScales, quantizationDim);
+ std::vector<uint8_t> data { 100, 0, 0, 0, 0, 0, 0, 0, 120, 0, 0, 0, 0, 0, 0, 0, 130, 0, 0, 0, 0, 0, 0, 0,
+ 140, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 160, 0, 0, 0, 0, 0, 0, 0,
+ 170, 0, 0, 0, 0, 0, 0, 0, 180, 0, 0, 0, 0, 0, 0, 0, 190, 0, 0, 0, 0, 0, 0, 0,
+ 200, 0, 0, 0, 0, 0, 0, 0, 210, 0, 0, 0, 0, 0, 0, 0, 220, 0, 0, 0, 0, 0, 0, 0 };
+ float expected[] { 10.0f, 24.0f, 39.0f, 56.0f, 15.0f, 32.0f, 51.0f, 72.0f, 19.0f, 40.0f, 63.0f, 88.0f };
+
+ std::unique_ptr<float[]> result = ToFloatArray(data, info);
+
+ for (uint i = 0; i < info.GetNumElements(); ++i)
+ {
+ CHECK_EQ(result[i], doctest::Approx(expected[i]));
+ }
+}
+
+TEST_CASE("ToFloatArraySigned64")
+{
+ armnn::TensorInfo info({ 3, 4 }, armnn::DataType::Signed64, 0.1f);
+ std::vector<uint8_t> data { 100, 0, 0, 0, 0, 0, 0, 0, 120, 0, 0, 0, 0, 0, 0, 0, 130, 0, 0, 0, 0, 0, 0, 0,
+ 140, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 160, 0, 0, 0, 0, 0, 0, 0,
+ 170, 0, 0, 0, 0, 0, 0, 0, 180, 0, 0, 0, 0, 0, 0, 0, 190, 0, 0, 0, 0, 0, 0, 0,
+ 200, 0, 0, 0, 0, 0, 0, 0, 210, 0, 0, 0, 0, 0, 0, 0, 220, 0, 0, 0, 0, 0, 0, 0 };
+ float expected[] { 10.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f };
+
+ std::unique_ptr<float[]> result = ToFloatArray(data, info);
+
+ for (uint i = 0; i < info.GetNumElements(); ++i)
+ {
+ CHECK_EQ(result[i], doctest::Approx(expected[i]));
+ }
+}
}
diff --git a/src/backends/aclCommon/ArmComputeTuningUtils.cpp b/src/backends/aclCommon/ArmComputeTuningUtils.cpp
new file mode 100644
index 0000000000..4680541ae5
--- /dev/null
+++ b/src/backends/aclCommon/ArmComputeTuningUtils.cpp
@@ -0,0 +1,60 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ArmComputeTuningUtils.hpp"
+
+namespace armnn
+{
+
+IGpuAccTunedParameters* IGpuAccTunedParameters::CreateRaw(IGpuAccTunedParameters::Mode mode,
+ IGpuAccTunedParameters::TuningLevel tuningLevel)
+{
+ return new ClTunedParameters(mode, tuningLevel);
+}
+
+IGpuAccTunedParametersPtr IGpuAccTunedParameters::Create(IGpuAccTunedParameters::Mode mode,
+ IGpuAccTunedParameters::TuningLevel tuningLevel)
+{
+ return IGpuAccTunedParametersPtr(CreateRaw(mode, tuningLevel), &IGpuAccTunedParameters::Destroy);
+}
+
+void IGpuAccTunedParameters::Destroy(IGpuAccTunedParameters* params)
+{
+ delete params;
+}
+
+ClTunedParameters::ClTunedParameters(IGpuAccTunedParameters::Mode mode,
+ IGpuAccTunedParameters::TuningLevel tuningLevel)
+ : m_Mode(mode)
+ , m_TuningLevel(tuningLevel)
+ , m_Tuner(mode == ClTunedParameters::Mode::UpdateTunedParameters)
+{
+}
+
+void ClTunedParameters::Load(const char* filename)
+{
+ try
+ {
+ m_Tuner.load_from_file(filename);
+ }
+ catch (const std::exception& e)
+ {
+ throw Exception(std::string("Failed to load tuned parameters file '") + filename + "': " + e.what());
+ }
+}
+
+void ClTunedParameters::Save(const char* filename) const
+{
+ try
+ {
+ m_Tuner.save_to_file(filename);
+ }
+ catch (const std::exception& e)
+ {
+ throw Exception(std::string("Failed to save tuned parameters file to '") + filename + "': " + e.what());
+ }
+}
+
+} \ No newline at end of file
diff --git a/src/backends/aclCommon/ArmComputeTuningUtils.hpp b/src/backends/aclCommon/ArmComputeTuningUtils.hpp
new file mode 100644
index 0000000000..6d99d3f08e
--- /dev/null
+++ b/src/backends/aclCommon/ArmComputeTuningUtils.hpp
@@ -0,0 +1,84 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/BackendOptions.hpp>
+#include <armnn/IRuntime.hpp>
+#include <armnn/Logging.hpp>
+
+#include <arm_compute/runtime/CL/CLTuner.h>
+#include <arm_compute/runtime/CL/CLTunerTypes.h>
+#include <arm_compute/runtime/CL/CLGEMMHeuristicsHandle.h>
+
+namespace armnn
+{
+
+enum class TuningLevel
+{
+ None,
+ Rapid,
+ Normal,
+ Exhaustive
+};
+
+inline TuningLevel ParseTuningLevel(const BackendOptions::Var& value, TuningLevel defaultValue)
+{
+ if (value.IsInt())
+ {
+ int v = value.AsInt();
+ if (v > static_cast<int>(TuningLevel::Exhaustive) ||
+ v < static_cast<int>(TuningLevel::None))
+ {
+ ARMNN_LOG(warning) << "Invalid GpuAcc tuning level ("<< v << ") selected. "
+ "Using default(" << static_cast<int>(defaultValue) << ")";
+ } else
+ {
+ return static_cast<TuningLevel>(v);
+ }
+ }
+ return defaultValue;
+}
+
+inline void ConfigureTuner(arm_compute::CLTuner &tuner, TuningLevel level)
+{
+ tuner.set_tune_new_kernels(true); // Turn on tuning initially.
+
+ switch (level)
+ {
+ case TuningLevel::Rapid:
+ ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Rapid (1)";
+ tuner.set_tuner_mode(arm_compute::CLTunerMode::RAPID);
+ break;
+ case TuningLevel::Normal:
+ ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Normal (2)";
+ tuner.set_tuner_mode(arm_compute::CLTunerMode::NORMAL);
+ break;
+ case TuningLevel::Exhaustive:
+ ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Exhaustive (3)";
+ tuner.set_tuner_mode(arm_compute::CLTunerMode::EXHAUSTIVE);
+ break;
+ case TuningLevel::None:
+ default:
+ tuner.set_tune_new_kernels(false); // Turn off tuning. Set to "use" only mode.
+ break;
+ }
+}
+
+class ClTunedParameters : public IGpuAccTunedParameters
+{
+public:
+ ClTunedParameters(IGpuAccTunedParameters::Mode mode, IGpuAccTunedParameters::TuningLevel tuningLevel);
+
+ virtual void Load(const char* filename);
+ virtual void Save(const char* filename) const;
+
+ Mode m_Mode;
+ TuningLevel m_TuningLevel;
+
+ arm_compute::CLTuner m_Tuner;
+ arm_compute::CLGEMMHeuristicsHandle m_HeuristicsHandle;
+};
+
+} \ No newline at end of file
diff --git a/src/backends/aclCommon/CMakeLists.txt b/src/backends/aclCommon/CMakeLists.txt
index 05fbe6cca9..b3bf89e750 100644
--- a/src/backends/aclCommon/CMakeLists.txt
+++ b/src/backends/aclCommon/CMakeLists.txt
@@ -1,5 +1,5 @@
#
-# Copyright © 2017 Arm Ltd. All rights reserved.
+# Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved.
# SPDX-License-Identifier: MIT
#
@@ -8,9 +8,12 @@ list(APPEND armnnAclCommon_sources
ArmComputeTensorHandle.hpp
ArmComputeTensorUtils.hpp
ArmComputeTensorUtils.cpp
+ ArmComputeTuningUtils.hpp
+ ArmComputeTuningUtils.cpp
ArmComputeUtils.hpp
BaseMemoryManager.cpp
BaseMemoryManager.hpp
+ IClTensorHandle.hpp
)
if(BUILD_UNIT_TESTS)
diff --git a/src/backends/cl/IClTensorHandle.hpp b/src/backends/aclCommon/IClTensorHandle.hpp
index 48cf5f57d6..48cf5f57d6 100644
--- a/src/backends/cl/IClTensorHandle.hpp
+++ b/src/backends/aclCommon/IClTensorHandle.hpp
diff --git a/src/backends/aclCommon/common.mk b/src/backends/aclCommon/common.mk
index 0ba966af14..b113269df9 100644
--- a/src/backends/aclCommon/common.mk
+++ b/src/backends/aclCommon/common.mk
@@ -9,6 +9,7 @@
COMMON_SOURCES := \
ArmComputeTensorUtils.cpp \
+ ArmComputeTuningUtils.cpp \
BaseMemoryManager.cpp
# COMMON_TEST_SOURCES contains the list of files to be included
diff --git a/src/backends/backendsCommon/CMakeLists.txt b/src/backends/backendsCommon/CMakeLists.txt
index b2ab932384..8d7e114fa5 100644
--- a/src/backends/backendsCommon/CMakeLists.txt
+++ b/src/backends/backendsCommon/CMakeLists.txt
@@ -1,9 +1,9 @@
#
-# Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+# Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved.
# SPDX-License-Identifier: MIT
#
-if(NOT BUILD_BARE_METAL)
+if(NOT BUILD_BARE_METAL AND NOT EXECUTE_NETWORK_STATIC)
list(APPEND armnnBackendsCommon_sources
DynamicBackend.cpp
DynamicBackend.hpp
diff --git a/src/backends/backendsCommon/test/BackendProfilingTests.cpp b/src/backends/backendsCommon/test/BackendProfilingTests.cpp
index d49fa7f2ec..9041b55c57 100644
--- a/src/backends/backendsCommon/test/BackendProfilingTests.cpp
+++ b/src/backends/backendsCommon/test/BackendProfilingTests.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2020, 2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -169,11 +169,6 @@ TEST_CASE("BackendProfilingCounterRegisterMockBackendTest")
unsigned int shiftedId = 0;
- if (armnn::BackendRegistryInstance().IsBackendRegistered("EthosNAcc"))
- {
- shiftedId = 4;
- }
-
// Check if the MockBackends 3 dummy counters {0, 1, 2-5 (four cores)} are registered
armnn::BackendId mockId = armnn::MockBackendId();
const ICounterMappings& counterMap = GetProfilingService(&runtime).GetCounterMappings();
diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
index 5fcc8b592e..d251bd2597 100644
--- a/src/backends/backendsCommon/test/CMakeLists.txt
+++ b/src/backends/backendsCommon/test/CMakeLists.txt
@@ -1,5 +1,5 @@
#
-# Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
+# Copyright © 2017-2022 Arm Ltd and Contributors. All rights reserved.
# SPDX-License-Identifier: MIT
#
@@ -41,6 +41,7 @@ list(APPEND armnnBackendsCommonUnitTests_sources
LogSoftmaxEndToEndTestImpl.hpp
MemoryManagerTests.cpp
MockBackendId.hpp
+ MultiplicationEndToEndTestImpl.hpp
OptimizeSubgraphViewTests.cpp
OptimizationViewsTests.cpp
PreluEndToEndTestImpl.hpp
@@ -57,6 +58,7 @@ list(APPEND armnnBackendsCommonUnitTests_sources
SpaceToDepthEndToEndTestImpl.hpp
SplitterEndToEndTestImpl.hpp
StridedSliceAsyncEndToEndTest.hpp
+ SubtractionEndToEndTestImpl.hpp
TransposeEndToEndTestImpl.hpp
TensorCopyUtils.hpp
WorkloadFactoryHelper.hpp
diff --git a/src/backends/backendsCommon/test/MultiplicationEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/MultiplicationEndToEndTestImpl.hpp
new file mode 100644
index 0000000000..40442e2d47
--- /dev/null
+++ b/src/backends/backendsCommon/test/MultiplicationEndToEndTestImpl.hpp
@@ -0,0 +1,96 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/INetwork.hpp>
+
+#include <CommonTestUtils.hpp>
+#include <ResolveType.hpp>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+template<typename armnn::DataType DataType>
+armnn::INetworkPtr CreateMultiplicationNetwork(const armnn::TensorShape& inputXShape,
+ const armnn::TensorShape& inputYShape,
+ const armnn::TensorShape& outputShape,
+ const float qScale = 1.0f,
+ const int32_t qOffset = 0)
+{
+ using namespace armnn;
+
+ INetworkPtr network(INetwork::Create());
+
+ TensorInfo inputXTensorInfo(inputXShape, DataType, qScale, qOffset, true);
+ TensorInfo inputYTensorInfo(inputYShape, DataType, qScale, qOffset, true);
+
+ TensorInfo outputTensorInfo(outputShape, DataType, qScale, qOffset);
+
+
+ IConnectableLayer* multiplication = network->AddMultiplicationLayer("multiplication");
+ IConnectableLayer* inputX = network->AddInputLayer(0, "inputX");
+ IConnectableLayer* inputY = network->AddInputLayer(1, "inputY");
+ IConnectableLayer* output = network->AddOutputLayer(0, "output");
+
+ Connect(inputX, multiplication, inputXTensorInfo, 0, 0);
+ Connect(inputY, multiplication, inputYTensorInfo, 0, 1);
+ Connect(multiplication, output, outputTensorInfo, 0, 0);
+
+ return network;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+void MultiplicationEndToEnd(const std::vector<armnn::BackendId>& backends)
+{
+ using namespace armnn;
+
+ const TensorShape& inputXShape = { 2, 2 };
+ const TensorShape& inputYShape = { 2, 2 };
+ const TensorShape& outputShape = { 2, 2 };
+
+ INetworkPtr network = CreateMultiplicationNetwork<ArmnnType>(inputXShape, inputYShape, outputShape);
+
+ CHECK(network);
+
+ std::vector<T> inputXData{ 1, 2, 3, 4 };
+ std::vector<T> inputYData{ 5, 2, 6, 3 };
+ std::vector<T> expectedOutput{ 5, 4, 18, 12 };
+
+ std::map<int, std::vector<T>> inputTensorData = {{ 0, inputXData }, {1, inputYData}};
+ std::map<int, std::vector<T>> expectedOutputData = { { 0, expectedOutput } };
+
+ EndToEndLayerTestImpl<ArmnnType, ArmnnType>(std::move(network), inputTensorData, expectedOutputData, backends);
+}
+
+template<armnn::DataType ArmnnType>
+void MultiplicationEndToEndFloat16(const std::vector<armnn::BackendId>& backends)
+{
+ using namespace armnn;
+ using namespace half_float::literal;
+ using Half = half_float::half;
+
+ const TensorShape& inputXShape = { 2, 2 };
+ const TensorShape& inputYShape = { 2, 2 };
+ const TensorShape& outputShape = { 2, 2 };
+
+ INetworkPtr network = CreateMultiplicationNetwork<ArmnnType>(inputXShape, inputYShape, outputShape);
+ CHECK(network);
+
+ std::vector<Half> inputXData{ 1._h, 2._h,
+ 3._h, 4._h };
+ std::vector<Half> inputYData{ 1._h, 2._h,
+ 3._h, 4._h };
+ std::vector<Half> expectedOutput{ 1._h, 4._h,
+ 9._h, 16._h };
+
+ std::map<int, std::vector<Half>> inputTensorData = {{ 0, inputXData }, { 1, inputYData }};
+ std::map<int, std::vector<Half>> expectedOutputData = { { 0, expectedOutput } };
+
+ EndToEndLayerTestImpl<ArmnnType, ArmnnType>(std::move(network), inputTensorData, expectedOutputData, backends);
+}
+
+} // anonymous namespace
diff --git a/src/backends/backendsCommon/test/OptimizationViewsTests.cpp b/src/backends/backendsCommon/test/OptimizationViewsTests.cpp
index 9b86784dce..ff84eea2de 100644
--- a/src/backends/backendsCommon/test/OptimizationViewsTests.cpp
+++ b/src/backends/backendsCommon/test/OptimizationViewsTests.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017, 2019-2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -288,4 +288,15 @@ TEST_CASE("OptimizeViewsValidateDeviceMockBackend")
CheckLayers(graph);
}
+TEST_CASE("OptimizedViewsReturnsINetworkReference")
+{
+ OptimizationViews view;
+
+ auto layer = view.GetINetworkRef().AddInputLayer(0, "input");
+
+ // Check layer has been added to the referenced INetwork
+ CHECK(layer);
+}
+
+
} \ No newline at end of file
diff --git a/src/backends/backendsCommon/test/SubtractionEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/SubtractionEndToEndTestImpl.hpp
new file mode 100644
index 0000000000..747fe26df0
--- /dev/null
+++ b/src/backends/backendsCommon/test/SubtractionEndToEndTestImpl.hpp
@@ -0,0 +1,96 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/INetwork.hpp>
+
+#include <CommonTestUtils.hpp>
+#include <ResolveType.hpp>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+template<typename armnn::DataType DataType>
+armnn::INetworkPtr CreateSubtractionNetwork(const armnn::TensorShape& inputXShape,
+ const armnn::TensorShape& inputYShape,
+ const armnn::TensorShape& outputShape,
+ const float qScale = 1.0f,
+ const int32_t qOffset = 0)
+{
+ using namespace armnn;
+
+ INetworkPtr network(INetwork::Create());
+
+ TensorInfo inputXTensorInfo(inputXShape, DataType, qScale, qOffset, true);
+ TensorInfo inputYTensorInfo(inputYShape, DataType, qScale, qOffset, true);
+
+ TensorInfo outputTensorInfo(outputShape, DataType, qScale, qOffset);
+
+
+ IConnectableLayer* subtraction = network->AddSubtractionLayer("subtraction");
+ IConnectableLayer* inputX = network->AddInputLayer(0, "inputX");
+ IConnectableLayer* inputY = network->AddInputLayer(1, "inputY");
+ IConnectableLayer* output = network->AddOutputLayer(0, "output");
+
+ Connect(inputX, subtraction, inputXTensorInfo, 0, 0);
+ Connect(inputY, subtraction, inputYTensorInfo, 0, 1);
+ Connect(subtraction, output, outputTensorInfo, 0, 0);
+
+ return network;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+void SubtractionEndToEnd(const std::vector<armnn::BackendId>& backends)
+{
+ using namespace armnn;
+
+ const TensorShape& inputXShape = { 2, 2 };
+ const TensorShape& inputYShape = { 2, 2 };
+ const TensorShape& outputShape = { 2, 2 };
+
+ INetworkPtr network = CreateSubtractionNetwork<ArmnnType>(inputXShape, inputYShape, outputShape);
+
+ CHECK(network);
+
+ std::vector<T> inputXData{ 10, 11, 12, 13 };
+ std::vector<T> inputYData{ 5, 7, 6, 8 };
+ std::vector<T> expectedOutput{ 5, 4, 6, 5 };
+
+ std::map<int, std::vector<T>> inputTensorData = {{ 0, inputXData }, {1, inputYData}};
+ std::map<int, std::vector<T>> expectedOutputData = { { 0, expectedOutput } };
+
+ EndToEndLayerTestImpl<ArmnnType, ArmnnType>(std::move(network), inputTensorData, expectedOutputData, backends);
+}
+
+template<armnn::DataType ArmnnType>
+void SubtractionEndToEndFloat16(const std::vector<armnn::BackendId>& backends)
+{
+ using namespace armnn;
+ using namespace half_float::literal;
+ using Half = half_float::half;
+
+ const TensorShape& inputXShape = { 2, 2 };
+ const TensorShape& inputYShape = { 2, 2 };
+ const TensorShape& outputShape = { 2, 2 };
+
+ INetworkPtr network = CreateSubtractionNetwork<ArmnnType>(inputXShape, inputYShape, outputShape);
+ CHECK(network);
+
+ std::vector<Half> inputXData{ 11._h, 12._h,
+ 13._h, 14._h };
+ std::vector<Half> inputYData{ 5._h, 7._h,
+ 6._h, 8._h };
+ std::vector<Half> expectedOutput{ 6._h, 5._h,
+ 7._h, 6._h };
+
+ std::map<int, std::vector<Half>> inputTensorData = {{ 0, inputXData }, { 1, inputYData }};
+ std::map<int, std::vector<Half>> expectedOutputData = { { 0, expectedOutput } };
+
+ EndToEndLayerTestImpl<ArmnnType, ArmnnType>(std::move(network), inputTensorData, expectedOutputData, backends);
+}
+
+} // anonymous namespace
diff --git a/src/backends/cl/CMakeLists.txt b/src/backends/cl/CMakeLists.txt
index aeb90b069c..20c42061fc 100644
--- a/src/backends/cl/CMakeLists.txt
+++ b/src/backends/cl/CMakeLists.txt
@@ -1,5 +1,5 @@
#
-# Copyright © 2017 Arm Ltd. All rights reserved.
+# Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
# SPDX-License-Identifier: MIT
#
@@ -44,7 +44,6 @@ if(ARMCOMPUTECL)
ClTensorHandleFactory.hpp
ClWorkloadFactory.cpp
ClWorkloadFactory.hpp
- IClTensorHandle.hpp
ICLTensorProxy.hpp
OpenClTimer.cpp
OpenClTimer.hpp
diff --git a/src/backends/cl/ClBackendContext.cpp b/src/backends/cl/ClBackendContext.cpp
index 62c6b038da..adee2763ba 100644
--- a/src/backends/cl/ClBackendContext.cpp
+++ b/src/backends/cl/ClBackendContext.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -20,20 +20,11 @@ namespace armnn
struct ClBackendContext::ClContextControlWrapper
{
- ClContextControlWrapper() {}
-
- bool IsInitialised()
- {
- return m_Initialised;
- }
-
- void Init(arm_compute::CLTuner* tuner,
- arm_compute::CLGEMMHeuristicsHandle* heuristicsHandle,
- bool profilingEnabled)
- {
- m_ClContextControl = ClContextControl(tuner, heuristicsHandle, profilingEnabled);
- m_Initialised = true;
- }
+ ClContextControlWrapper(arm_compute::CLTuner* tuner,
+ arm_compute::CLGEMMHeuristicsHandle* heuristicsHandle,
+ bool profilingEnabled)
+ : m_ClContextControl(tuner, heuristicsHandle, profilingEnabled)
+ {}
bool Sync()
{
@@ -62,106 +53,12 @@ struct ClBackendContext::ClContextControlWrapper
{
// There are no loaded networks left, so clear the CL cache to free up memory
m_ClContextControl.ClearClCache();
- m_Initialised = false;
}
}
-private:
- bool m_Initialised;
ClContextControl m_ClContextControl;
-
};
-/**
- * Returns a shared_ptr to the CLContextControlWrapper. This wraps the CLContextControl and ensures that we only create
- * and use one at a time.
- */
-std::shared_ptr<ClBackendContext::ClContextControlWrapper> ClBackendContext::Get()
-{
- static std::shared_ptr<ClBackendContext::ClContextControlWrapper> instance
- = std::make_shared<ClBackendContext::ClContextControlWrapper>();
- // Instantiated on first use.
- return instance;
-}
-
-std::string LowerString(std::string value)
-{
- std::transform(value.begin(), value.end(), value.begin(),
- [](unsigned char c){ return std::tolower(c); });
-
- return value;
-}
-
-enum class TuningLevel
-{
- None,
- Rapid,
- Normal,
- Exhaustive
-};
-
-
-TuningLevel ParseTuningLevel(const BackendOptions::Var& value, TuningLevel defaultValue)
-{
- if (value.IsInt())
- {
- int v = value.AsInt();
- if (v > static_cast<int>(TuningLevel::Exhaustive) ||
- v < static_cast<int>(TuningLevel::None))
- {
- ARMNN_LOG(warning) << "Invalid GpuAcc tuning level ("<< v << ") selected. "
- "Using default(" << static_cast<int>(defaultValue) << ")";
- } else
- {
- return static_cast<TuningLevel>(v);
- }
- }
- return defaultValue;
-}
-
-bool ParseBoolean(const BackendOptions::Var& value, bool defaultValue)
-{
- if (value.IsBool())
- {
- return value.AsBool();
- }
- return defaultValue;
-}
-
-std::string ParseFile(const BackendOptions::Var& value, std::string defaultValue)
-{
- if (value.IsString())
- {
- return value.AsString();
- }
- return defaultValue;
-}
-
-void ConfigureTuner(arm_compute::CLTuner &tuner, TuningLevel level)
-{
- tuner.set_tune_new_kernels(true); // Turn on tuning initially.
-
- switch (level)
- {
- case TuningLevel::Rapid:
- ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Rapid (1)";
- tuner.set_tuner_mode(arm_compute::CLTunerMode::RAPID);
- break;
- case TuningLevel::Normal:
- ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Normal (2)";
- tuner.set_tuner_mode(arm_compute::CLTunerMode::NORMAL);
- break;
- case TuningLevel::Exhaustive:
- ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Exhaustive (3)";
- tuner.set_tuner_mode(arm_compute::CLTunerMode::EXHAUSTIVE);
- break;
- case TuningLevel::None:
- default:
- tuner.set_tune_new_kernels(false); // Turn off tuning. Set to "use" only mode.
- break;
- }
-}
-
ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options)
: IBackendContext(options)
, m_TuningFile()
@@ -171,7 +68,6 @@ ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options)
arm_compute::CLTuner* tuner = nullptr;
arm_compute::CLGEMMHeuristicsHandle* mlgoTuner = nullptr;
bool useLegacyTunerAPI = options.m_GpuAccTunedParameters.get() != nullptr;
-
if (useLegacyTunerAPI)
{
auto clTunerParams = PolymorphicDowncast<ClTunedParameters*>(
@@ -217,17 +113,17 @@ ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options)
{
if (name == "KernelProfilingEnabled")
{
- kernelProfiling |= ParseBoolean(value, false);
+ kernelProfiling |= ParseBooleanBackendOption(value, false);
} else if (name == "TuningFile")
{
- m_TuningFile = ParseFile(value, "");
+ m_TuningFile = ParseStringBackendOption(value, "");
} else if (name == "TuningLevel")
{
tuningLevel = ParseTuningLevel(value, defaultTuningLevel);
}
else if (name == "MLGOTuningFilePath")
{
- m_MLGOTuningFile = ParseFile(value, "");
+ m_MLGOTuningFile = ParseStringBackendOption(value, "");
}
});
@@ -272,12 +168,11 @@ ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options)
tuner = m_Tuner.get();
}
- m_ClContextControlWrapper = Get();
-
- if (!m_ClContextControlWrapper->IsInitialised())
- {
- m_ClContextControlWrapper->Init(tuner, mlgoTuner, kernelProfiling);
- }
+ m_ClContextControlWrapper = std::make_unique<ClContextControlWrapper>(
+ tuner,
+ mlgoTuner,
+ kernelProfiling
+ );
}
bool ClBackendContext::BeforeLoadNetwork(NetworkId)
diff --git a/src/backends/cl/ClBackendContext.hpp b/src/backends/cl/ClBackendContext.hpp
index 276067727b..659d47b7c2 100644
--- a/src/backends/cl/ClBackendContext.hpp
+++ b/src/backends/cl/ClBackendContext.hpp
@@ -31,11 +31,8 @@ public:
private:
std::mutex m_Mutex;
-
struct ClContextControlWrapper;
- static std::shared_ptr<ClBackendContext::ClContextControlWrapper> Get();
-
- std::shared_ptr<ClBackendContext::ClContextControlWrapper> m_ClContextControlWrapper;
+ std::unique_ptr<ClContextControlWrapper> m_ClContextControlWrapper;
std::unordered_set<NetworkId> m_NetworkIds;
diff --git a/src/backends/cl/ClContextControl.cpp b/src/backends/cl/ClContextControl.cpp
index fd2d0f53eb..34eca961b4 100644
--- a/src/backends/cl/ClContextControl.cpp
+++ b/src/backends/cl/ClContextControl.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -166,55 +166,4 @@ void ClContextControl::ClearClCache()
DoLoadOpenClRuntime(true);
}
-armnn::IGpuAccTunedParameters* IGpuAccTunedParameters::CreateRaw(armnn::IGpuAccTunedParameters::Mode mode,
- armnn::IGpuAccTunedParameters::TuningLevel tuningLevel)
-{
- return new ClTunedParameters(mode, tuningLevel);
-}
-
-armnn::IGpuAccTunedParametersPtr IGpuAccTunedParameters::Create(armnn::IGpuAccTunedParameters::Mode mode,
- armnn::IGpuAccTunedParameters::TuningLevel tuningLevel)
-{
- return IGpuAccTunedParametersPtr(CreateRaw(mode, tuningLevel), &IGpuAccTunedParameters::Destroy);
-}
-
-void IGpuAccTunedParameters::Destroy(IGpuAccTunedParameters* params)
-{
- delete params;
-}
-
-ClTunedParameters::ClTunedParameters(armnn::IGpuAccTunedParameters::Mode mode,
- armnn::IGpuAccTunedParameters::TuningLevel tuningLevel)
- : m_Mode(mode)
- , m_TuningLevel(tuningLevel)
- , m_Tuner(mode == ClTunedParameters::Mode::UpdateTunedParameters)
-{
-}
-
-void ClTunedParameters::Load(const char* filename)
-{
- try
- {
- m_Tuner.load_from_file(filename);
- }
- catch (const std::exception& e)
- {
- throw armnn::Exception(std::string("Failed to load tuned parameters file '") + filename + "': " +
- e.what());
- }
-}
-
-void ClTunedParameters::Save(const char* filename) const
-{
- try
- {
- m_Tuner.save_to_file(filename);
- }
- catch (const std::exception& e)
- {
- throw armnn::Exception(std::string("Failed to save tuned parameters file to '") + filename + "': " +
- e.what());
- }
-}
-
} // namespace armnn
diff --git a/src/backends/cl/ClContextControl.hpp b/src/backends/cl/ClContextControl.hpp
index 4a640cdf22..7520d102a5 100644
--- a/src/backends/cl/ClContextControl.hpp
+++ b/src/backends/cl/ClContextControl.hpp
@@ -1,13 +1,10 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
-#include "armnn/IRuntime.hpp"
-
-#include <arm_compute/runtime/CL/CLTuner.h>
-#include <arm_compute/runtime/CL/CLGEMMHeuristicsHandle.h>
+#include <aclCommon/ArmComputeTuningUtils.hpp>
namespace armnn
{
@@ -42,19 +39,4 @@ private:
bool m_ProfilingEnabled;
};
-class ClTunedParameters : public IGpuAccTunedParameters
-{
-public:
- ClTunedParameters(armnn::IGpuAccTunedParameters::Mode mode, armnn::IGpuAccTunedParameters::TuningLevel tuningLevel);
-
- virtual void Load(const char* filename);
- virtual void Save(const char* filename) const;
-
- Mode m_Mode;
- TuningLevel m_TuningLevel;
-
- arm_compute::CLTuner m_Tuner;
- arm_compute::CLGEMMHeuristicsHandle m_HeuristicsHandle;
-};
-
} // namespace armnn
diff --git a/src/backends/cl/ClImportTensorHandle.hpp b/src/backends/cl/ClImportTensorHandle.hpp
index 889a2ad5f3..a03a4e9ea6 100644
--- a/src/backends/cl/ClImportTensorHandle.hpp
+++ b/src/backends/cl/ClImportTensorHandle.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -19,7 +19,7 @@
#include <arm_compute/core/TensorShape.h>
#include <arm_compute/core/Coordinates.h>
-#include <cl/IClTensorHandle.hpp>
+#include <aclCommon/IClTensorHandle.hpp>
#include <CL/cl_ext.h>
#include <arm_compute/core/CL/CLKernelLibrary.h>
diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp
index a61a5bb640..cb2d756037 100644
--- a/src/backends/cl/ClLayerSupport.cpp
+++ b/src/backends/cl/ClLayerSupport.cpp
@@ -22,6 +22,7 @@
#include "workloads/ClAdditionWorkload.hpp"
#include "workloads/ClActivationWorkload.hpp"
#include "workloads/ClArgMinMaxWorkload.hpp"
+#include "workloads/ClBatchMatMulWorkload.hpp"
#include "workloads/ClBatchNormalizationFloatWorkload.hpp"
#include "workloads/ClBatchToSpaceNdWorkload.hpp"
#include "workloads/ClCastWorkload.hpp"
@@ -201,6 +202,12 @@ bool ClLayerSupport::IsLayerSupported(const LayerType& type,
infos[1],
*(PolymorphicDowncast<const ArgMinMaxDescriptor*>(&descriptor)),
reasonIfUnsupported);
+ case LayerType::BatchMatMul:
+ return IsBatchMatMulSupported(infos[0],
+ infos[1],
+ infos[2],
+ *(PolymorphicDowncast<const BatchMatMulDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::BatchNormalization:
return IsBatchNormalizationSupported(infos[0],
infos[1],
@@ -640,6 +647,20 @@ bool ClLayerSupport::IsArgMinMaxSupported(const TensorInfo& input,
descriptor);
}
+bool ClLayerSupport::IsBatchMatMulSupported(const TensorInfo& inputX,
+ const TensorInfo& inputY,
+ const TensorInfo& output,
+ const BatchMatMulDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported) const
+{
+ FORWARD_WORKLOAD_VALIDATE_FUNC(ClBatchMatMulValidate,
+ reasonIfUnsupported,
+ inputX,
+ inputY,
+ output,
+ descriptor);
+}
+
bool ClLayerSupport::IsBatchNormalizationSupported(const TensorInfo& input,
const TensorInfo& output,
const TensorInfo& mean,
diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp
index 27311f74aa..2d784e3df8 100644
--- a/src/backends/cl/ClLayerSupport.hpp
+++ b/src/backends/cl/ClLayerSupport.hpp
@@ -40,6 +40,12 @@ public:
const ArgMinMaxDescriptor& descriptor,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+ bool IsBatchMatMulSupported(const TensorInfo& inputX,
+ const TensorInfo& inputY,
+ const TensorInfo& output,
+ const BatchMatMulDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
bool IsBatchNormalizationSupported(const TensorInfo& input,
const TensorInfo& output,
const TensorInfo& mean,
diff --git a/src/backends/cl/ClTensorHandle.hpp b/src/backends/cl/ClTensorHandle.hpp
index f63f1faa07..3d750f9059 100644
--- a/src/backends/cl/ClTensorHandle.hpp
+++ b/src/backends/cl/ClTensorHandle.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
@@ -18,7 +18,7 @@
#include <arm_compute/core/TensorShape.h>
#include <arm_compute/core/Coordinates.h>
-#include <cl/IClTensorHandle.hpp>
+#include <aclCommon/IClTensorHandle.hpp>
namespace armnn
{
diff --git a/src/backends/cl/ClTensorHandleFactory.cpp b/src/backends/cl/ClTensorHandleFactory.cpp
index b8ee57f0bf..82e41d3ff6 100644
--- a/src/backends/cl/ClTensorHandleFactory.cpp
+++ b/src/backends/cl/ClTensorHandleFactory.cpp
@@ -108,12 +108,12 @@ bool ClTensorHandleFactory::SupportsSubTensors() const
MemorySourceFlags ClTensorHandleFactory::GetExportFlags() const
{
- return m_ExportFlags;
+ return MemorySourceFlags(MemorySource::Undefined);
}
MemorySourceFlags ClTensorHandleFactory::GetImportFlags() const
{
- return m_ImportFlags;
+ return MemorySourceFlags(MemorySource::Undefined);
}
-} // namespace armnn \ No newline at end of file
+} // namespace armnn
diff --git a/src/backends/cl/ClTensorHandleFactory.hpp b/src/backends/cl/ClTensorHandleFactory.hpp
index 3acab0bce7..8e1c7a8a02 100644
--- a/src/backends/cl/ClTensorHandleFactory.hpp
+++ b/src/backends/cl/ClTensorHandleFactory.hpp
@@ -24,8 +24,6 @@ public:
ClTensorHandleFactory(std::shared_ptr<ClMemoryManager> mgr)
: m_MemoryManager(mgr)
- , m_ImportFlags(static_cast<MemorySourceFlags>(MemorySource::Undefined))
- , m_ExportFlags(static_cast<MemorySourceFlags>(MemorySource::Undefined))
{}
std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent,
@@ -56,8 +54,6 @@ public:
private:
mutable std::shared_ptr<ClMemoryManager> m_MemoryManager;
- MemorySourceFlags m_ImportFlags;
- MemorySourceFlags m_ExportFlags;
};
-} // namespace armnn \ No newline at end of file
+} // namespace armnn
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index d0079abd38..6bf510a2ef 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -265,6 +265,11 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateWorkload(LayerType type,
auto argMinMaxQueueDescriptor = PolymorphicDowncast<const ArgMinMaxQueueDescriptor*>(&descriptor);
return MakeWorkload<ClArgMinMaxWorkload>(*argMinMaxQueueDescriptor, info, m_CLCompileContext);
}
+ case LayerType::BatchMatMul :
+ {
+ auto batchMatMulQueueDescriptor = PolymorphicDowncast<const BatchMatMulQueueDescriptor*>(&descriptor);
+ return std::make_unique<ClBatchMatMulWorkload>(*batchMatMulQueueDescriptor, info, m_CLCompileContext);
+ }
case LayerType::BatchNormalization :
{
auto batchNormalizationQueueDescriptor
diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk
index 6fda16db05..1f97ae7cc8 100644
--- a/src/backends/cl/backend.mk
+++ b/src/backends/cl/backend.mk
@@ -30,6 +30,7 @@ BACKEND_SOURCES := \
workloads/ClActivationWorkload.cpp \
workloads/ClAdditionWorkload.cpp \
workloads/ClArgMinMaxWorkload.cpp \
+ workloads/ClBatchMatMulWorkload.cpp \
workloads/ClBatchNormalizationFloatWorkload.cpp \
workloads/ClBatchToSpaceNdWorkload.cpp \
workloads/ClCastWorkload.cpp \
diff --git a/src/backends/cl/test/CMakeLists.txt b/src/backends/cl/test/CMakeLists.txt
index ec1d0a6c2f..6568d48ce5 100644
--- a/src/backends/cl/test/CMakeLists.txt
+++ b/src/backends/cl/test/CMakeLists.txt
@@ -1,5 +1,5 @@
#
-# Copyright © 2017 Arm Ltd. All rights reserved.
+# Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
# SPDX-License-Identifier: MIT
#
@@ -8,6 +8,7 @@ list(APPEND armnnClBackendUnitTests_sources
ClContextControlFixture.hpp
ClContextSerializerTests.cpp
ClCustomAllocatorTests.cpp
+ ClDefaultAllocatorTests.cpp
ClCreateWorkloadTests.cpp
ClEndToEndTests.cpp
ClImportTensorHandleFactoryTests.cpp
@@ -18,7 +19,6 @@ list(APPEND armnnClBackendUnitTests_sources
ClOptimizedNetworkTests.cpp
ClRuntimeTests.cpp
ClWorkloadFactoryHelper.hpp
- DefaultAllocatorTests.cpp
Fp16SupportTest.cpp
ICLTensorProxyTests.cpp
OpenClTimerTest.cpp
diff --git a/src/backends/cl/test/DefaultAllocatorTests.cpp b/src/backends/cl/test/ClDefaultAllocatorTests.cpp
index eaa30c8800..411a480815 100644
--- a/src/backends/cl/test/DefaultAllocatorTests.cpp
+++ b/src/backends/cl/test/ClDefaultAllocatorTests.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2021, 2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp
index 855697c9be..4ba2a9ec3b 100644
--- a/src/backends/cl/test/ClLayerTests.cpp
+++ b/src/backends/cl/test/ClLayerTests.cpp
@@ -73,6 +73,29 @@ ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tanh, ClContextControlFixture, TanhTest)
// Elu Activation
ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Elu, ClContextControlFixture, EluTest)
+// Batch Mat Mul
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul2DSimpleFloat32,
+ ClContextControlFixture,
+ BatchMatMul2DSimpleTest<DataType::Float32>);
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul3DSimpleFloat32,
+ ClContextControlFixture,
+ BatchMatMul3DSimpleTest<DataType::Float32>);
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul3DBatchFloat32,
+ ClContextControlFixture,
+ BatchMatMul3DBatchTest<DataType::Float32>);
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul3DBroadcastFloat32,
+ ClContextControlFixture,
+ BatchMatMul3DBroadcastTest<DataType::Float32>);
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul3D2DBroadcastFloat32,
+ ClContextControlFixture,
+ BatchMatMul3D2DBroadcastTest<DataType::Float32>);
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul2DTinyFloat32,
+ ClContextControlFixture,
+ BatchMatMul2DTinyTest<DataType::Float32>);
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul2DTranspSimpleFloat32,
+ ClContextControlFixture,
+ BatchMatMul2DTranspSimpleTest<DataType::Float32>);
+
// Batch To Space
ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchToSpaceNdNhwcFloat321,
ClContextControlFixture,
diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt
index aef7fc7ad2..8616dec078 100644
--- a/src/backends/cl/workloads/CMakeLists.txt
+++ b/src/backends/cl/workloads/CMakeLists.txt
@@ -12,6 +12,8 @@ list(APPEND armnnClBackendWorkloads_sources
ClAdditionWorkload.hpp
ClArgMinMaxWorkload.cpp
ClArgMinMaxWorkload.hpp
+ ClBatchMatMulWorkload.cpp
+ ClBatchMatMulWorkload.hpp
ClBatchNormalizationFloatWorkload.cpp
ClBatchNormalizationFloatWorkload.hpp
ClBatchToSpaceNdWorkload.cpp
diff --git a/src/backends/cl/workloads/ClBatchMatMulWorkload.cpp b/src/backends/cl/workloads/ClBatchMatMulWorkload.cpp
new file mode 100644
index 0000000000..4acdef5e5c
--- /dev/null
+++ b/src/backends/cl/workloads/ClBatchMatMulWorkload.cpp
@@ -0,0 +1,203 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClBatchMatMulWorkload.hpp"
+
+#include "ClWorkloadUtils.hpp"
+
+#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
+
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
+#include <armnnUtils/Permute.hpp>
+
+#include <backendsCommon/WorkloadUtils.hpp>
+
+#include <cl/ClTensorHandle.hpp>
+
+#include <arm_compute/runtime/CL/functions/CLGEMM.h>
+#include <arm_compute/runtime/CL/functions/CLPermute.h>
+
+
+namespace armnn
+{
+arm_compute::Status ClBatchMatMulValidate(const TensorInfo& inputX,
+ const TensorInfo& inputY,
+ const TensorInfo& output,
+ const BatchMatMulDescriptor& descriptor)
+{
+ if (descriptor.m_AdjointX || descriptor.m_AdjointY )
+ {
+ throw Exception("Support for adjoint not implemented.");
+ }
+ if (descriptor.m_DataLayoutX != armnn::DataLayout::NCHW || descriptor.m_DataLayoutY != armnn::DataLayout::NCHW )
+ {
+ throw Exception("Only supported the MatMul in the last 2 dimensions");
+ }
+
+ arm_compute::Status statusGEMM = arm_compute::Status(arm_compute::ErrorCode::OK);
+ arm_compute::Status statusPermuteX = arm_compute::Status(arm_compute::ErrorCode::OK);
+ arm_compute::Status statusPermuteY = arm_compute::Status(arm_compute::ErrorCode::OK);
+
+ const auto aclInputXInfo = armcomputetensorutils::BuildArmComputeTensorInfo(inputX, descriptor.m_DataLayoutX);
+ const auto aclInputYInfo = armcomputetensorutils::BuildArmComputeTensorInfo(inputY, descriptor.m_DataLayoutY);
+ const auto aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+ arm_compute::TensorInfo aclPermutedXInfo = arm_compute::TensorInfo();
+ arm_compute::TensorInfo aclPermutedYInfo = arm_compute::TensorInfo();
+
+ if (descriptor.m_TransposeX == true)
+ {
+ auto permutationXVector = GeneratePermutationVectorOnLastTwoDimensions(inputX.GetNumDimensions());
+ const auto aclPermutationXVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationXVector);
+ const TensorInfo permutedXInfo = armnnUtils::Permuted(inputX, permutationXVector);
+ aclPermutedXInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permutedXInfo);
+
+ statusPermuteX = arm_compute::CLPermute::validate(&aclInputXInfo,
+ &aclPermutedXInfo,
+ aclPermutationXVector);
+ }
+
+ if ( descriptor.m_TransposeY == true)
+ {
+ auto permutationYVector = GeneratePermutationVectorOnLastTwoDimensions(inputY.GetNumDimensions());
+ const auto aclPermutationYVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationYVector);
+ const TensorInfo permutedYInfo = armnnUtils::Permuted(inputY, permutationYVector);
+ aclPermutedYInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permutedYInfo);
+
+ statusPermuteY = arm_compute::CLPermute::validate(&aclInputYInfo,
+ &aclPermutedYInfo,
+ aclPermutationYVector);
+
+ }
+
+ const arm_compute::GEMMInfo& gemm_info = arm_compute::GEMMInfo(false, // is inputX reshaped
+ false, // is inputY reshaped
+ false); // is inputY reshaped only 1st run
+
+
+ statusGEMM = arm_compute::CLGEMM::validate(descriptor.m_TransposeX ? &aclPermutedXInfo : &aclInputXInfo,
+ descriptor.m_TransposeY ? &aclPermutedYInfo : &aclInputYInfo,
+ nullptr,
+ &aclOutputInfo,
+ 1.0,
+ 0,
+ gemm_info);
+
+ if (statusPermuteX.error_code() == arm_compute::ErrorCode::OK &&
+ statusPermuteY.error_code() == arm_compute::ErrorCode::OK &&
+ statusGEMM.error_code() == arm_compute::ErrorCode::OK)
+ {
+ return arm_compute::Status(arm_compute::ErrorCode::OK,
+ "All Batch Mat Mul layers validate status OK.");
+ }
+ else
+ {
+ return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
+ "BatchMatMul layer validate status failed."
+ + statusGEMM.error_description()
+ + statusPermuteX.error_description()
+ + statusPermuteY.error_description());
+ }
+
+}
+
+ClBatchMatMulWorkload::ClBatchMatMulWorkload(const BatchMatMulQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
+ : ClBaseWorkload<BatchMatMulQueueDescriptor>(descriptor, info)
+{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClBatchMatMulWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
+ if (descriptor.m_Parameters.m_AdjointX || descriptor.m_Parameters.m_AdjointY )
+ {
+ throw Exception("Support for adjoint not implemented.");
+ }
+ if (descriptor.m_Parameters.m_DataLayoutX != armnn::DataLayout::NCHW ||
+ descriptor.m_Parameters.m_DataLayoutY != armnn::DataLayout::NCHW )
+ {
+ throw Exception("Only supported the MatMul in the last 2 dimensions");
+ }
+
+ m_Data.ValidateInputsOutputs("ClBatchMatMulWorkload", 2, 1);
+
+ const arm_compute::ICLTensor& inputX = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ const arm_compute::ICLTensor& inputY = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+ arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ inputX.info()->set_data_layout(armcomputetensorutils::ConvertDataLayout(m_Data.m_Parameters.m_DataLayoutX));
+ inputY.info()->set_data_layout(armcomputetensorutils::ConvertDataLayout(m_Data.m_Parameters.m_DataLayoutY));
+
+ arm_compute::TensorInfo aclPermutedXInfo = arm_compute::TensorInfo();
+ arm_compute::TensorInfo aclPermutedYInfo = arm_compute::TensorInfo();
+
+ if (descriptor.m_Parameters.m_TransposeX == true)
+ {
+ armnn::PermutationVector permutationXVector
+ = GeneratePermutationVectorOnLastTwoDimensions(info.m_InputTensorInfos[0].GetNumDimensions());
+ const TensorInfo permutedXInfo = armnnUtils::Permuted(info.m_InputTensorInfos[0], permutationXVector);
+ const auto aclPermutationXVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationXVector);
+ armcomputetensorutils::BuildArmComputeTensor(m_PermutedTensorX, permutedXInfo);
+ armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_PermutedTensorX);
+
+ auto permuteLayerX = std::make_unique<arm_compute::CLPermute>();
+ permuteLayerX->configure(clCompileContext,
+ &inputX,
+ &m_PermutedTensorX,
+ aclPermutationXVector);
+ m_PermuteLayerX.reset(permuteLayerX.release());
+ }
+
+ if (descriptor.m_Parameters.m_TransposeY == true)
+ {
+ armnn::PermutationVector permutationYVector
+ = GeneratePermutationVectorOnLastTwoDimensions(info.m_InputTensorInfos[0].GetNumDimensions());
+ const TensorInfo permutedYInfo = armnnUtils::Permuted(info.m_InputTensorInfos[0], permutationYVector);
+ const auto aclPermutationYVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationYVector);
+ armcomputetensorutils::BuildArmComputeTensor(m_PermutedTensorY, permutedYInfo);
+ armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_PermutedTensorY);
+
+ std::unique_ptr<arm_compute::CLPermute> permuteLayerY(new arm_compute::CLPermute());
+ permuteLayerY->configure(clCompileContext,
+ &inputY,
+ &m_PermutedTensorY,
+ aclPermutationYVector);
+ m_PermuteLayerY.reset(permuteLayerY.release());
+ }
+
+ const arm_compute::GEMMInfo& gemm_info = arm_compute::GEMMInfo(false, // is inputX reshaped
+ false, // is inputY reshaped
+ false); // is inputY reshaped only 1st run
+ auto gemmLayer = std::make_unique<arm_compute::CLGEMM>();
+ gemmLayer->configure(clCompileContext,
+ descriptor.m_Parameters.m_TransposeX ? &m_PermutedTensorX : &inputX,
+ descriptor.m_Parameters.m_TransposeY ? &m_PermutedTensorY : &inputY,
+ nullptr,
+ &output,
+ 1.0,
+ 0,
+ gemm_info);
+ m_GEMMLayer.reset(gemmLayer.release());
+}
+
+void ClBatchMatMulWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClBatchMatMulWorkload_Execute", this->GetGuid());
+ if (m_PermuteLayerX)
+ {
+ m_PermuteLayerX->run();
+ }
+ if (m_PermuteLayerY)
+ {
+ m_PermuteLayerY->run();
+ }
+ m_GEMMLayer->run();
+}
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClBatchMatMulWorkload.hpp b/src/backends/cl/workloads/ClBatchMatMulWorkload.hpp
new file mode 100644
index 0000000000..5277efc947
--- /dev/null
+++ b/src/backends/cl/workloads/ClBatchMatMulWorkload.hpp
@@ -0,0 +1,41 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ClBaseWorkload.hpp"
+
+#include <arm_compute/runtime/IFunction.h>
+#include <arm_compute/runtime/CL/CLTensor.h>
+#include <memory>
+
+namespace armnn
+{
+ arm_compute::Status ClBatchMatMulValidate(const TensorInfo& inputX,
+ const TensorInfo& inputY,
+ const TensorInfo& output,
+ const BatchMatMulDescriptor& descriptor);
+
+ class ClBatchMatMulWorkload : public ClBaseWorkload<BatchMatMulQueueDescriptor>
+ {
+ public:
+ ClBatchMatMulWorkload(const BatchMatMulQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
+ virtual void Execute() const override;
+
+ private:
+ // ACL layers required to fully form a Batch Mat Mul layer.
+ std::unique_ptr<arm_compute::IFunction> m_GEMMLayer;
+ std::unique_ptr<arm_compute::IFunction> m_PermuteLayerX;
+ std::unique_ptr<arm_compute::IFunction> m_PermuteLayerY;
+
+ // Additional CL arm_compute::Tensors.
+ // Required to perform permutations.
+ arm_compute::CLTensor m_PermutedTensorX;
+ arm_compute::CLTensor m_PermutedTensorY;
+
+ };
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp
index c3a79b7583..44f3798d7d 100644
--- a/src/backends/cl/workloads/ClWorkloads.hpp
+++ b/src/backends/cl/workloads/ClWorkloads.hpp
@@ -10,6 +10,7 @@
#include "ClArgMinMaxWorkload.hpp"
#include "ClComparisonWorkload.hpp"
#include "ClConstantWorkload.hpp"
+#include "ClBatchMatMulWorkload.hpp"
#include "ClBatchNormalizationFloatWorkload.hpp"
#include "ClBatchToSpaceNdWorkload.hpp"
#include "ClCastWorkload.hpp"
diff --git a/src/backends/dynamic/reference/CMakeLists.txt b/src/backends/dynamic/reference/CMakeLists.txt
index de46f7a5cb..fe875282f5 100644
--- a/src/backends/dynamic/reference/CMakeLists.txt
+++ b/src/backends/dynamic/reference/CMakeLists.txt
@@ -1,9 +1,9 @@
#
-# Copyright © 2017 Arm Ltd. All rights reserved.
+# Copyright © 2017, 2023 Arm Ltd. All rights reserved.
# SPDX-License-Identifier: MIT
#
-if(NOT BUILD_BARE_METAL)
+if((NOT BUILD_BARE_METAL) AND (NOT EXECUTE_NETWORK_STATIC))
# File needed to wrap the existing backend into a dynamic one
list(APPEND armnnRefDynamicBackend_sources
@@ -33,5 +33,5 @@ target_include_directories(Arm_CpuRef_backend PRIVATE ${PROJECT_SOURCE_DIR}/prof
set_target_properties(Arm_CpuRef_backend PROPERTIES PREFIX "")
target_link_libraries(Arm_CpuRef_backend armnn)
-# BUILD_BARE_METAL
+# BUILD_BARE_METAL && EXECUTE_NETWORK_STATIC
endif()
diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp
index 4c97855668..ee155a2c64 100644
--- a/src/backends/neon/NeonLayerSupport.cpp
+++ b/src/backends/neon/NeonLayerSupport.cpp
@@ -90,6 +90,19 @@ namespace armnn
namespace
{
+const TensorInfo OverrideDataType(const TensorInfo& info, Optional<DataType> type)
+{
+ if (!type)
+ {
+ return info;
+ }
+ return TensorInfo(info.GetShape(),
+ type.value(),
+ info.GetQuantizationScale(),
+ info.GetQuantizationOffset(),
+ info.IsConstant());
+}
+
template< typename ... Args>
bool IsNeonBackendSupported(Optional<std::string&> reasonIfUnsupported, Args... args)
{
@@ -151,61 +164,64 @@ NeonLayerSupport::NeonLayerSupport()
{
}
-bool NeonLayerSupport::IsLayerSupported(const LayerType& type,
- const std::vector<TensorInfo>& infos,
- const BaseDescriptor& descriptor,
- const Optional<LstmInputParamsInfo>& lstmParamsInfo,
- const Optional<QuantizedLstmInputParamsInfo>& quantizedLstmParamsInfo,
- Optional<std::string&> reasonIfUnsupported) const
+bool IsLayerTypeSupported(const LayerType& type,
+ const std::vector<TensorInfo>& infos,
+ const BaseDescriptor& descriptor,
+ const Optional<LstmInputParamsInfo>& lstmParamsInfo,
+ const Optional<QuantizedLstmInputParamsInfo>& quantizedLstmParamsInfo,
+ Optional<std::string&> reasonIfUnsupported,
+ const NeonLayerSupport& support)
{
switch (type)
{
case LayerType::Activation:
- return IsActivationSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const ActivationDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsActivationSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const ActivationDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::Addition:
- return IsAdditionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
+ return support.IsAdditionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
case LayerType::ArgMinMax:
- return IsArgMinMaxSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const ArgMinMaxDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsArgMinMaxSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const ArgMinMaxDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::BatchMatMul:
- return IsBatchMatMulSupported(infos[0],
- infos[1],
- infos[2],
- *(PolymorphicDowncast<const BatchMatMulDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsBatchMatMulSupported(infos[0],
+ infos[1],
+ infos[2],
+ *(PolymorphicDowncast<const BatchMatMulDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::BatchNormalization:
- return IsBatchNormalizationSupported(infos[0],
- infos[1],
- infos[2],
- infos[3],
- infos[4],
- infos[5],
- *(PolymorphicDowncast<const BatchNormalizationDescriptor*>
- (&descriptor)),
- reasonIfUnsupported);
+ return support.IsBatchNormalizationSupported(infos[0],
+ infos[1],
+ infos[2],
+ infos[3],
+ infos[4],
+ infos[5],
+ *(PolymorphicDowncast<const
+ BatchNormalizationDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::BatchToSpaceNd:
- return IsBatchToSpaceNdSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const BatchToSpaceNdDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsBatchToSpaceNdSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const
+ BatchToSpaceNdDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::Cast:
- return IsCastSupported(infos[0], infos[1], reasonIfUnsupported);
+ return support.IsCastSupported(infos[0], infos[1], reasonIfUnsupported);
case LayerType::ChannelShuffle:
- return IsChannelShuffleSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const ChannelShuffleDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsChannelShuffleSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const
+ ChannelShuffleDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::Comparison:
- return IsComparisonSupported(infos[0],
- infos[1],
- infos[2],
- *(PolymorphicDowncast<const ComparisonDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsComparisonSupported(infos[0],
+ infos[1],
+ infos[2],
+ *(PolymorphicDowncast<const ComparisonDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::Concat:
{
std::vector<const TensorInfo*> inputInfos;
@@ -213,17 +229,17 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type,
{
inputInfos.push_back(&infos[i]);
}
- return IsConcatSupported(inputInfos,
- infos[infos.size() - 1],
- *(PolymorphicDowncast<const OriginsDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsConcatSupported(inputInfos,
+ infos[infos.size() - 1],
+ *(PolymorphicDowncast<const OriginsDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
}
case LayerType::Constant:
- return IsConstantSupported(infos[0], reasonIfUnsupported);
+ return support.IsConstantSupported(infos[0], reasonIfUnsupported);
case LayerType::ConvertFp16ToFp32:
- return IsConvertFp16ToFp32Supported(infos[0], infos[1], reasonIfUnsupported);
+ return support.IsConvertFp16ToFp32Supported(infos[0], infos[1], reasonIfUnsupported);
case LayerType::ConvertFp32ToFp16:
- return IsConvertFp32ToFp16Supported(infos[0], infos[1], reasonIfUnsupported);
+ return support.IsConvertFp32ToFp16Supported(infos[0], infos[1], reasonIfUnsupported);
case LayerType::Convolution2d:
{
if (infos.size() != 4)
@@ -235,21 +251,21 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type,
auto desc = *(PolymorphicDowncast<const Convolution2dDescriptor*>(&descriptor));
if (infos[3] == TensorInfo())
{
- return IsConvolution2dSupported(infos[0],
- infos[1],
- desc,
- infos[2],
- EmptyOptional(),
- reasonIfUnsupported);
+ return support.IsConvolution2dSupported(infos[0],
+ infos[1],
+ desc,
+ infos[2],
+ EmptyOptional(),
+ reasonIfUnsupported);
}
else
{
- return IsConvolution2dSupported(infos[0],
- infos[1],
- desc,
- infos[2],
- infos[3],
- reasonIfUnsupported);
+ return support.IsConvolution2dSupported(infos[0],
+ infos[1],
+ desc,
+ infos[2],
+ infos[3],
+ reasonIfUnsupported);
}
}
case LayerType::Convolution3d:
@@ -263,28 +279,28 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type,
auto desc = *(PolymorphicDowncast<const Convolution3dDescriptor*>(&descriptor));
if (infos[3] == TensorInfo())
{
- return IsConvolution3dSupported(infos[0],
- infos[1],
- desc,
- infos[2],
- EmptyOptional(),
- reasonIfUnsupported);
+ return support.IsConvolution3dSupported(infos[0],
+ infos[1],
+ desc,
+ infos[2],
+ EmptyOptional(),
+ reasonIfUnsupported);
}
else
{
- return IsConvolution3dSupported(infos[0],
- infos[1],
- desc,
- infos[2],
- infos[3],
- reasonIfUnsupported);
+ return support.IsConvolution3dSupported(infos[0],
+ infos[1],
+ desc,
+ infos[2],
+ infos[3],
+ reasonIfUnsupported);
}
}
case LayerType::DepthToSpace:
- return IsDepthToSpaceSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const DepthToSpaceDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsDepthToSpaceSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const DepthToSpaceDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::DepthwiseConvolution2d:
{
if (infos.size() != 4)
@@ -296,217 +312,223 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type,
auto desc = *(PolymorphicDowncast<const DepthwiseConvolution2dDescriptor*>(&descriptor));
if (infos[3] == TensorInfo())
{
- return IsDepthwiseConvolutionSupported(infos[0],
- infos[1],
- desc,
- infos[2],
- EmptyOptional(),
- reasonIfUnsupported);
+ return support.IsDepthwiseConvolutionSupported(infos[0],
+ infos[1],
+ desc,
+ infos[2],
+ EmptyOptional(),
+ reasonIfUnsupported);
}
else
{
- return IsDepthwiseConvolutionSupported(infos[0],
- infos[1],
- desc,
- infos[2],
- infos[3],
- reasonIfUnsupported);
+ return support.IsDepthwiseConvolutionSupported(infos[0],
+ infos[1],
+ desc,
+ infos[2],
+ infos[3],
+ reasonIfUnsupported);
}
}
case LayerType::Dequantize:
- return IsDequantizeSupported(infos[0], infos[1], reasonIfUnsupported);
+ return support.IsDequantizeSupported(infos[0], infos[1], reasonIfUnsupported);
case LayerType::DetectionPostProcess:
{
auto desc = *(PolymorphicDowncast<const DetectionPostProcessDescriptor*>(&descriptor));
- return LayerSupportBase::IsDetectionPostProcessSupported(infos[0],
- infos[1],
- infos[2],
- infos[3],
- infos[4],
- infos[5],
- infos[6],
- desc,
- reasonIfUnsupported);
+ return support.IsDetectionPostProcessSupported(infos[0],
+ infos[1],
+ infos[2],
+ infos[3],
+ infos[4],
+ infos[5],
+ infos[6],
+ desc,
+ reasonIfUnsupported);
}
case LayerType::Division:
- return IsDivisionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
+ return support.IsDivisionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
case LayerType::ElementwiseUnary:
- return IsElementwiseUnarySupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const ElementwiseUnaryDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsElementwiseUnarySupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const
+ ElementwiseUnaryDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::Fill:
- return IsFillSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const FillDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsFillSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const FillDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::Floor:
- return IsFloorSupported(infos[0], infos[1], reasonIfUnsupported);
+ return support.IsFloorSupported(infos[0], infos[1], reasonIfUnsupported);
case LayerType::FullyConnected:
- return IsFullyConnectedSupported(infos[0],
+ return support.IsFullyConnectedSupported(infos[0],
+ infos[1],
+ infos[2],
+ infos[3],
+ *(PolymorphicDowncast<const
+ FullyConnectedDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
+ case LayerType::Gather:
+ return support.IsGatherSupported(infos[0],
infos[1],
infos[2],
- infos[3],
- *(PolymorphicDowncast<const FullyConnectedDescriptor*>(&descriptor)),
+ *(PolymorphicDowncast<const GatherDescriptor*>(&descriptor)),
reasonIfUnsupported);
- case LayerType::Gather:
- return IsGatherSupported(infos[0],
- infos[1],
- infos[2],
- *(PolymorphicDowncast<const GatherDescriptor*>(&descriptor)),
- reasonIfUnsupported);
case LayerType::GatherNd:
- return IsGatherNdSupported(infos[0],
- infos[1],
- infos[2],
- reasonIfUnsupported);
+ return support.IsGatherNdSupported(infos[0],
+ infos[1],
+ infos[2],
+ reasonIfUnsupported);
case LayerType::Input:
- return IsInputSupported(infos[0], reasonIfUnsupported);
+ return support.IsInputSupported(infos[0], reasonIfUnsupported);
case LayerType::InstanceNormalization:
- return IsInstanceNormalizationSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const InstanceNormalizationDescriptor*>
- (&descriptor)),
- reasonIfUnsupported);
+ return support.IsInstanceNormalizationSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const
+ InstanceNormalizationDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::L2Normalization:
- return IsL2NormalizationSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const L2NormalizationDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsL2NormalizationSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const
+ L2NormalizationDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::LogicalBinary:
- return IsLogicalBinarySupported(infos[0],
- infos[1],
- infos[2],
- *(PolymorphicDowncast<const LogicalBinaryDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsLogicalBinarySupported(infos[0],
+ infos[1],
+ infos[2],
+ *(PolymorphicDowncast<const
+ LogicalBinaryDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::LogSoftmax:
- return IsLogSoftmaxSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const LogSoftmaxDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsLogSoftmaxSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const LogSoftmaxDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::Lstm:
- return IsLstmSupported(infos[0],
- infos[1],
- infos[2],
- infos[3],
- infos[4],
- infos[5],
- infos[6],
- *(PolymorphicDowncast<const LstmDescriptor*>(&descriptor)),
- lstmParamsInfo.value(),
- reasonIfUnsupported);
+ return support.IsLstmSupported(infos[0],
+ infos[1],
+ infos[2],
+ infos[3],
+ infos[4],
+ infos[5],
+ infos[6],
+ *(PolymorphicDowncast<const LstmDescriptor*>(&descriptor)),
+ lstmParamsInfo.value(),
+ reasonIfUnsupported);
case LayerType::Map:
return true;
case LayerType::Maximum:
- return IsMaximumSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
+ return support.IsMaximumSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
case LayerType::Mean:
- return IsMeanSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const MeanDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsMeanSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const MeanDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::MemCopy:
- return LayerSupportBase::IsMemCopySupported(infos[0], infos[1], reasonIfUnsupported);
+ return support.IsMemCopySupported(infos[0], infos[1], reasonIfUnsupported);
case LayerType::MemImport:
- return LayerSupportBase::IsMemImportSupported(infos[0], infos[1], reasonIfUnsupported);
+ return support.IsMemImportSupported(infos[0], infos[1], reasonIfUnsupported);
case LayerType::Merge:
- return LayerSupportBase::IsMergeSupported(infos[0],
+ return support.IsMergeSupported(infos[0],
infos[1],
infos[2],
reasonIfUnsupported);
case LayerType::Minimum:
- return IsMinimumSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
+ return support.IsMinimumSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
case LayerType::Multiplication:
- return IsMultiplicationSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
+ return support.IsMultiplicationSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
case LayerType::Normalization:
- return IsNormalizationSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const NormalizationDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsNormalizationSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const
+ NormalizationDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::Output:
- return IsOutputSupported(infos[0], reasonIfUnsupported);
+ return support.IsOutputSupported(infos[0], reasonIfUnsupported);
case LayerType::Pad:
- return IsPadSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const PadDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsPadSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const PadDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::Permute:
- return IsPermuteSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const PermuteDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsPermuteSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const PermuteDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::Pooling2d:
- return IsPooling2dSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const Pooling2dDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsPooling2dSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const Pooling2dDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::Pooling3d:
- return IsPooling3dSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const Pooling3dDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsPooling3dSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const Pooling3dDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::Prelu:
- return IsPreluSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
+ return support.IsPreluSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
case LayerType::QLstm:
- return IsQLstmSupported(infos[0],
- infos[1],
- infos[2],
- infos[3],
- infos[4],
- infos[5],
- *(PolymorphicDowncast<const QLstmDescriptor*>(&descriptor)),
- lstmParamsInfo.value(),
- reasonIfUnsupported);
- case LayerType::Quantize:
- return IsQuantizeSupported(infos[0], infos[1], reasonIfUnsupported);
- case LayerType::QuantizedLstm:
- return IsQuantizedLstmSupported(infos[0],
+ return support.IsQLstmSupported(infos[0],
infos[1],
infos[2],
infos[3],
infos[4],
- quantizedLstmParamsInfo.value(),
+ infos[5],
+ *(PolymorphicDowncast<const QLstmDescriptor*>(&descriptor)),
+ lstmParamsInfo.value(),
reasonIfUnsupported);
+ case LayerType::Quantize:
+ return support.IsQuantizeSupported(infos[0], infos[1], reasonIfUnsupported);
+ case LayerType::QuantizedLstm:
+ return support.IsQuantizedLstmSupported(infos[0],
+ infos[1],
+ infos[2],
+ infos[3],
+ infos[4],
+ quantizedLstmParamsInfo.value(),
+ reasonIfUnsupported);
case LayerType::Rank:
return true;
case LayerType::Reshape:
- return IsReshapeSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const ReshapeDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsReshapeSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const ReshapeDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::Resize:
- return IsResizeSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const ResizeDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsResizeSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const ResizeDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::Reduce:
- return IsReduceSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const ReduceDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsReduceSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const ReduceDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::Shape:
- return LayerSupportBase::IsShapeSupported(infos[0],
- infos[1],
- reasonIfUnsupported);
+ return support.IsShapeSupported(infos[0],
+ infos[1],
+ reasonIfUnsupported);
case LayerType::Slice:
- return IsSliceSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const SliceDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsSliceSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const SliceDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::Softmax:
- return IsSoftmaxSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const SoftmaxDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsSoftmaxSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const SoftmaxDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::SpaceToBatchNd:
- return IsSpaceToBatchNdSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const SpaceToBatchNdDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsSpaceToBatchNdSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const
+ SpaceToBatchNdDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::SpaceToDepth:
- return IsSpaceToDepthSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const SpaceToDepthDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsSpaceToDepthSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const SpaceToDepthDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::Splitter:
{
std::vector<TensorInfo> outputInfos;
@@ -514,10 +536,10 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type,
{
outputInfos.push_back(infos[i]);
}
- return IsSplitterSupported(infos[0],
- {outputInfos.begin(), outputInfos.end()},
- *(PolymorphicDowncast<const ViewsDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsSplitterSupported(infos[0],
+ {outputInfos.begin(), outputInfos.end()},
+ *(PolymorphicDowncast<const ViewsDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
}
case LayerType::Stack:
{
@@ -526,23 +548,23 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type,
{
inputInfos.push_back(&infos[i]);
}
- return IsStackSupported(inputInfos,
- infos[infos.size() - 1],
- *(PolymorphicDowncast<const StackDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsStackSupported(inputInfos,
+ infos[infos.size() - 1],
+ *(PolymorphicDowncast<const StackDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
}
case LayerType::StridedSlice:
- return IsStridedSliceSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const StridedSliceDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsStridedSliceSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const StridedSliceDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::Subtraction:
- return IsSubtractionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
+ return support.IsSubtractionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
case LayerType::Transpose:
- return IsTransposeSupported(infos[0],
- infos[1],
- *(PolymorphicDowncast<const TransposeDescriptor*>(&descriptor)),
- reasonIfUnsupported);
+ return support.IsTransposeSupported(infos[0],
+ infos[1],
+ *(PolymorphicDowncast<const TransposeDescriptor*>(&descriptor)),
+ reasonIfUnsupported);
case LayerType::TransposeConvolution2d:
{
if (infos.size() != 4)
@@ -554,34 +576,36 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type,
auto desc = *(PolymorphicDowncast<const TransposeConvolution2dDescriptor*>(&descriptor));
if (infos[3] == TensorInfo())
{
- return IsTransposeConvolution2dSupported(infos[0],
- infos[1],
- desc,
- infos[2],
- EmptyOptional(),
- reasonIfUnsupported);
+ return support.IsTransposeConvolution2dSupported(infos[0],
+ infos[1],
+ desc,
+ infos[2],
+ EmptyOptional(),
+ reasonIfUnsupported);
}
else
{
- return IsTransposeConvolution2dSupported(infos[0],
- infos[1],
- desc,
- infos[2],
- infos[3],
- reasonIfUnsupported);
+ return support.IsTransposeConvolution2dSupported(infos[0],
+ infos[1],
+ desc,
+ infos[2],
+ infos[3],
+ reasonIfUnsupported);
}
}
case LayerType::UnidirectionalSequenceLstm:
- return IsUnidirectionalSequenceLstmSupported(infos[0],
- infos[1],
- infos[2],
- infos[3],
- infos[4],
- infos[5],
- *(PolymorphicDowncast<const
- UnidirectionalSequenceLstmDescriptor*>(&descriptor)),
- lstmParamsInfo.value(),
- reasonIfUnsupported);
+ {
+ auto desc = *(PolymorphicDowncast<const UnidirectionalSequenceLstmDescriptor*>(&descriptor));
+ return support.IsUnidirectionalSequenceLstmSupported(infos[0],
+ infos[1],
+ infos[2],
+ infos[3],
+ infos[4],
+ infos[5],
+ desc,
+ lstmParamsInfo.value(),
+ reasonIfUnsupported);
+ }
case LayerType::Unmap:
return true;
default:
@@ -592,6 +616,54 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type,
}
}
+bool NeonLayerSupport::IsLayerSupported(const LayerType& type,
+ const std::vector<TensorInfo>& infos,
+ const BaseDescriptor& descriptor,
+ const Optional<LstmInputParamsInfo>& lstmParamsInfo,
+ const Optional<QuantizedLstmInputParamsInfo>& quantizedLstmParamsInfo,
+ Optional<std::string&> reasonIfUnsupported) const
+{
+ bool isSupported = IsLayerTypeSupported(type,
+ infos,
+ descriptor,
+ lstmParamsInfo,
+ quantizedLstmParamsInfo,
+ reasonIfUnsupported,
+ *this);
+
+ // For android-nn-driver and support library, to run FP16 operations on CpuAcc we need at least v8.2
+ // architecture. If the available architecture is older than v8.2, we can check if the operator is
+ // supported by changing operator inputs & outputs to be FP32.
+ // This does not change the operator datatype in the above parsers to be FP32. We are simply reporting
+ // to the parsers if the operator can supported in ArmNN. We will then re-enter ArmNN (Network.cpp)
+ // where we will recheck IsLayerSupported() on the FP16 datatype, update the operator to be FP32,
+ // and, insert convert layers around the FP32 operator.
+ if (reasonIfUnsupported.has_value())
+ {
+ std::string checkStr = "This CPU architecture does not support F16 data type, you need v8.2 or above";
+ if (!isSupported
+ && reasonIfUnsupported.value().find(checkStr) != std::string::npos)
+ {
+ std::vector<TensorInfo> newInfos;
+ for (auto info: infos)
+ {
+ newInfos.emplace_back(OverrideDataType(info, DataType::Float32));
+ }
+
+ std::string tmpString;
+ return IsLayerTypeSupported(type,
+ newInfos,
+ descriptor,
+ lstmParamsInfo,
+ quantizedLstmParamsInfo,
+ tmpString,
+ *this);
+ }
+ }
+
+ return isSupported;
+}
+
bool NeonLayerSupport::IsActivationSupported(const TensorInfo& input,
const TensorInfo& output,
const ActivationDescriptor& descriptor,
diff --git a/src/backends/tosaCommon/TosaMappings.cpp b/src/backends/tosaCommon/TosaMappings.cpp
index 1452e4aefd..0b5fa1a158 100644
--- a/src/backends/tosaCommon/TosaMappings.cpp
+++ b/src/backends/tosaCommon/TosaMappings.cpp
@@ -24,8 +24,10 @@ TosaSerializationBasicBlock* GetTosaMapping(const Layer* layer,
switch (type)
{
case LayerType::Addition:
+ case LayerType::Multiplication:
+ case LayerType::Subtraction:
{
- return ConvertAdditionToTosaOperator(layer, inputs, outputs);
+ return ConvertElementwiseBinaryToTosaOperator(layer, type, inputs, outputs);
}
case LayerType::Concat:
{
@@ -77,6 +79,11 @@ TosaSerializationBasicBlock* GetTosaMapping(const Layer* layer,
auto transposeConv2dDesc = PolymorphicDowncast<const TransposeConvolution2dDescriptor*>(&descriptor);
return ConvertTransposeConv2dToTosaOperator(layer, inputs, outputs, transposeConv2dDesc);
}
+ case LayerType::Transpose:
+ {
+ auto transposeDesc = PolymorphicDowncast<const TransposeDescriptor*>(&descriptor);
+ return ConvertTransposeToTosaOperator(layer, inputs, outputs, transposeDesc);
+ }
default:
{
return CreateEmptyTosaSerializationBasicBlock();
diff --git a/src/backends/tosaCommon/operatorMappings/AdditionOperator.hpp b/src/backends/tosaCommon/operatorMappings/AdditionOperator.hpp
deleted file mode 100644
index 5eb7441531..0000000000
--- a/src/backends/tosaCommon/operatorMappings/AdditionOperator.hpp
+++ /dev/null
@@ -1,20 +0,0 @@
-//
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#pragma once
-
-#include "TosaOperatorUtils.hpp"
-
-#include <Layer.hpp>
-
-#include <tosa_serialization_handler.h>
-
-using namespace armnn;
-using namespace tosa;
-
-TosaSerializationBasicBlock* ConvertAdditionToTosaOperator(const Layer* layer,
- const std::vector<const TensorInfo*>& inputs,
- const std::vector<const TensorInfo*>& outputs);
-
diff --git a/src/backends/tosaCommon/operatorMappings/CMakeLists.txt b/src/backends/tosaCommon/operatorMappings/CMakeLists.txt
index 2443dc0585..2ec052cd43 100644
--- a/src/backends/tosaCommon/operatorMappings/CMakeLists.txt
+++ b/src/backends/tosaCommon/operatorMappings/CMakeLists.txt
@@ -4,8 +4,6 @@
#
list(APPEND armnnTosaBackendOperators_sources
- AdditionOperator.hpp
- AdditionOperator.cpp
AvgPool2DIgnoreValueOperator.hpp
AvgPool2DIgnoreValueOperator.cpp
ConcatOperator.hpp
@@ -14,6 +12,8 @@ list(APPEND armnnTosaBackendOperators_sources
ConstantOperator.cpp
Conv2dOperator.hpp
Conv2dOperator.cpp
+ ElementwiseBinaryOperator.hpp
+ ElementwiseBinaryOperator.cpp
Pooling2DOperator.hpp
Pooling2DOperator.cpp
ReshapeOperator.hpp
@@ -23,6 +23,8 @@ list(APPEND armnnTosaBackendOperators_sources
TosaOperatorUtils.hpp
TransposeConv2dOperator.hpp
TransposeConv2dOperator.cpp
+ TransposeOperator.hpp
+ TransposeOperator.cpp
)
add_library(armnnTosaBackendOperators OBJECT ${armnnTosaBackendOperators_sources})
diff --git a/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp b/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp
new file mode 100644
index 0000000000..9909e66a7d
--- /dev/null
+++ b/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp
@@ -0,0 +1,103 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ElementwiseBinaryOperator.hpp"
+
+TosaSerializationBasicBlock* ConvertElementwiseBinaryToTosaOperator(const Layer* layer,
+ const LayerType type,
+ const std::vector<const TensorInfo*>& inputs,
+ const std::vector<const TensorInfo*>& outputs)
+{
+ std::string input0Name = std::string("input0_");
+ std::string input1Name = std::string("input1_");
+ std::string outputName = std::string("output0_");
+ std::string blockName;
+
+ // If a layer is present then the block will be used for execution, so input and output names need to be determined
+ // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
+ if(layer != nullptr)
+ {
+ // Get the layers connected to the input slots and determine unique tensor names.
+ Layer& connectedLayer0 = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
+ input0Name = GenerateUniqueName(connectedLayer0, 0);
+
+ Layer& connectedLayer1 = layer->GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer();
+ input1Name = GenerateUniqueName(connectedLayer1, 1);
+
+ // Determine unique output tensor name.
+ outputName = GenerateUniqueOutputName(*layer, 0);
+ }
+
+ TosaSerializationOperator* op = nullptr;
+ switch(type)
+ {
+ case LayerType::Addition:
+ {
+ op = new TosaSerializationOperator(Op_ADD,
+ Attribute_NONE,
+ nullptr,
+ {input0Name, input1Name},
+ {outputName});
+ blockName = std::string("Op_ADD_block_") + GetUniqueTosaMappingID();
+ break;
+ }
+ case LayerType::Multiplication:
+ {
+ int32_t shift = 0;
+ TosaMulAttribute mulAttribute(shift);
+ op = new TosaSerializationOperator(Op_MUL,
+ Attribute_MulAttribute,
+ &mulAttribute,
+ {input0Name, input1Name},
+ {outputName});
+ blockName = std::string("Op_MUL_block_") + GetUniqueTosaMappingID();
+ break;
+ }
+ case LayerType::Subtraction:
+ {
+ op = new TosaSerializationOperator(Op_SUB,
+ Attribute_NONE,
+ nullptr,
+ {input0Name, input1Name},
+ {outputName});
+ blockName = std::string("Op_SUB_block_") + GetUniqueTosaMappingID();
+ break;
+ }
+ default:
+ throw armnn::Exception("ConvertElementwiseBinaryToTosaOperator: Unsupported layer type.");
+ }
+ ARMNN_ASSERT(op != nullptr);
+
+ std::vector<TosaSerializationTensor*> tensors;
+ // Only add input tensors if connected layer is an input layer.
+ // As intermediate or constant tensors will be created separately.
+ // There also can't be duplicate tensor.
+ if(input0Name.find("input0_") != std::string::npos)
+ {
+ std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputs[0]->GetShape());
+ DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType());
+ tensors.push_back(new TosaSerializationTensor(input0Name, inputShape0, inputDType0, {}));
+ }
+ if(input1Name.find("input1_") != std::string::npos)
+ {
+ std::vector<int32_t> inputShape1 = GetTosaTensorShape(inputs[1]->GetShape());
+ DType inputDType1 = ArmNNToDType(inputs[1]->GetDataType());
+ tensors.push_back(new TosaSerializationTensor(input1Name, inputShape1, inputDType1, {}));
+ }
+
+ std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputs[0]->GetShape());
+ DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType());
+
+ tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
+
+ // operatorInputNames/operatorOutputNames ends up being the same as
+ // blockInputNames/blockOutputNames for one-to-one ArmNN to Tosa mappings
+ return new TosaSerializationBasicBlock(blockName, // name
+ {op}, // operators
+ tensors, // tensors
+ {input0Name, input1Name}, // inputs
+ {outputName}); // outputs
+}
+
diff --git a/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.hpp b/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.hpp
new file mode 100644
index 0000000000..86031c6e06
--- /dev/null
+++ b/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TosaOperatorUtils.hpp"
+
+#include <Layer.hpp>
+
+#include <tosa_serialization_handler.h>
+
+using namespace armnn;
+using namespace tosa;
+
+TosaSerializationBasicBlock* ConvertElementwiseBinaryToTosaOperator(const Layer* layer,
+ const LayerType type,
+ const std::vector<const TensorInfo*>& inputs,
+ const std::vector<const TensorInfo*>& outputs); \ No newline at end of file
diff --git a/src/backends/tosaCommon/operatorMappings/TosaCommonOperators.hpp b/src/backends/tosaCommon/operatorMappings/TosaCommonOperators.hpp
index 052c54c3af..3f27371295 100644
--- a/src/backends/tosaCommon/operatorMappings/TosaCommonOperators.hpp
+++ b/src/backends/tosaCommon/operatorMappings/TosaCommonOperators.hpp
@@ -5,12 +5,13 @@
#pragma once
-#include "AdditionOperator.hpp"
+#include "AvgPool2DIgnoreValueOperator.hpp"
#include "ConcatOperator.hpp"
#include "ConstantOperator.hpp"
#include "Conv2dOperator.hpp"
-#include "AvgPool2DIgnoreValueOperator.hpp"
+#include "ElementwiseBinaryOperator.hpp"
#include "Pooling2DOperator.hpp"
#include "ReshapeOperator.hpp"
#include "SliceOperator.hpp"
-#include "TransposeConv2dOperator.hpp" \ No newline at end of file
+#include "TransposeConv2dOperator.hpp"
+#include "TransposeOperator.hpp"
diff --git a/src/backends/tosaCommon/operatorMappings/AdditionOperator.cpp b/src/backends/tosaCommon/operatorMappings/TransposeOperator.cpp
index 7014886d92..56178e428b 100644
--- a/src/backends/tosaCommon/operatorMappings/AdditionOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/TransposeOperator.cpp
@@ -3,36 +3,37 @@
// SPDX-License-Identifier: MIT
//
-#include "AdditionOperator.hpp"
+#include "TransposeOperator.hpp"
-TosaSerializationBasicBlock* ConvertAdditionToTosaOperator(const Layer* layer,
- const std::vector<const TensorInfo*>& inputs,
- const std::vector<const TensorInfo*>& outputs)
+TosaSerializationBasicBlock* ConvertTransposeToTosaOperator(const Layer* layer,
+ const std::vector<const TensorInfo*>& inputs,
+ const std::vector<const TensorInfo*>& outputs,
+ const TransposeDescriptor* transposeDescriptor)
{
std::string input0Name = std::string("input0_");
- std::string input1Name = std::string("input1_");
std::string outputName = std::string("output0_");
- std::string blockName = std::string("Op_ADD_block_") + GetUniqueTosaMappingID();
+ std::string blockName = std::string("Op_TRANSPOSE_block_") + GetUniqueTosaMappingID();
// If a layer is present then the block will be used for execution, so input and output names need to be determined
// using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
if(layer != nullptr)
{
- // Get the layers connected to the input slots and determine unique tensors names.
+ // Get the layers connected to the input slot and determine unique tensor name.
Layer& connectedLayer0 = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
input0Name = GenerateUniqueName(connectedLayer0, 0);
- Layer& connectedLayer1 = layer->GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer();
- input1Name = GenerateUniqueName(connectedLayer1, 1);
-
// Determine unique output tensor name.
outputName = GenerateUniqueOutputName(*layer, 0);
}
- auto* op = new TosaSerializationOperator(Op_ADD,
- Attribute_NONE,
- nullptr,
- {input0Name, input1Name},
+ std::vector<int32_t> mappings(transposeDescriptor->m_DimMappings.begin(),
+ transposeDescriptor->m_DimMappings.end());
+ TosaTransposeAttribute attribute(mappings);
+
+ auto* op = new TosaSerializationOperator(Op_TRANSPOSE,
+ Attribute_TransposeAttribute,
+ &attribute,
+ {input0Name},
{outputName});
@@ -49,14 +50,6 @@ TosaSerializationBasicBlock* ConvertAdditionToTosaOperator(const Layer* layer,
tensors.push_back(new TosaSerializationTensor(input0Name, inputShape0, inputDType0, {}));
}
- if(input1Name.find("input1_") != std::string::npos)
- {
- std::vector<int32_t> inputShape1 = GetTosaTensorShape(inputs[1]->GetShape());
- DType inputDType1 = ArmNNToDType(inputs[1]->GetDataType());
-
- tensors.push_back(new TosaSerializationTensor(input1Name, inputShape1, inputDType1, {}));
- }
-
std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputs[0]->GetShape());
DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType());
@@ -67,6 +60,6 @@ TosaSerializationBasicBlock* ConvertAdditionToTosaOperator(const Layer* layer,
return new TosaSerializationBasicBlock(blockName, // name
{op}, // operators
tensors, // tensors
- {input0Name, input1Name}, // inputs
+ {input0Name}, // inputs
{outputName}); // outputs
} \ No newline at end of file
diff --git a/src/backends/tosaCommon/operatorMappings/TransposeOperator.hpp b/src/backends/tosaCommon/operatorMappings/TransposeOperator.hpp
new file mode 100644
index 0000000000..3d1e2acd14
--- /dev/null
+++ b/src/backends/tosaCommon/operatorMappings/TransposeOperator.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TosaOperatorUtils.hpp"
+
+#include <Layer.hpp>
+
+#include <tosa_serialization_handler.h>
+
+using namespace armnn;
+using namespace tosa;
+
+TosaSerializationBasicBlock* ConvertTransposeToTosaOperator(const Layer* layer,
+ const std::vector<const TensorInfo*>& inputs,
+ const std::vector<const TensorInfo*>& outputs,
+ const TransposeDescriptor* transposeDescriptor);
diff --git a/src/backends/tosaCommon/test/OneToOneMappingTests.cpp b/src/backends/tosaCommon/test/OneToOneMappingTests.cpp
index b3ab14a774..4cc37918e5 100644
--- a/src/backends/tosaCommon/test/OneToOneMappingTests.cpp
+++ b/src/backends/tosaCommon/test/OneToOneMappingTests.cpp
@@ -253,6 +253,54 @@ TEST_CASE("GetTosaMappingFromLayer_Conv2dLayer")
basicBlock, inputShape, outputShape, Op_CONV2D, Attribute_ConvAttribute, descriptor, LayerType::Convolution2d);
}
+TEST_CASE("GetTosaMapping_MultiplicationLayer")
+{
+
+ const TensorInfo input0Info ({ 1, 2, 4, 2 }, DataType::Float32);
+ const TensorInfo input1Info ({ 1, 2, 4, 2 }, DataType::Float32);
+ const TensorInfo outputInfo ({ 1, 2, 4, 2 }, DataType::Float32);
+
+ std::vector<std::vector<int32_t>> inputShape = {{ 1, 2, 4, 2 }, { 1, 2, 4, 2 }};
+ std::vector<std::vector<int32_t>> outputShape = {{ 1, 2, 4, 2 }};
+
+ TosaSerializationBasicBlock* basicBlock =
+ GetTosaMapping(nullptr, LayerType::Multiplication, {&input0Info, &input1Info}, {&outputInfo}, BaseDescriptor());
+ AssertTosaOneToOneMappingBasicBlock( basicBlock, inputShape, outputShape,
+ tosa::Op_MUL, tosa::Attribute_MulAttribute, BaseDescriptor(), LayerType::Multiplication);
+}
+
+TEST_CASE("GetTosaMappingFromLayer_MultiplicationLayer")
+{
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // Builds up the structure of the network.
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
+ IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
+ IConnectableLayer* add = net->AddMultiplicationLayer("multiplication");
+ IConnectableLayer* output = net->AddOutputLayer(0, "output");
+
+ input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
+ input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
+ add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ TensorInfo info = TensorInfo({ 2, 2 }, DataType::Float32, 0.0f, 0, true);
+
+ input0->GetOutputSlot(0).SetTensorInfo(info);
+ input1->GetOutputSlot(0).SetTensorInfo(info);
+ add->GetOutputSlot(0).SetTensorInfo(info);
+
+ std::vector<std::vector<int32_t>> inputShape = {{ 2, 2 }, { 2, 2 }};
+ std::vector<std::vector<int32_t>> outputShape = {{ 2, 2 }};
+
+ TosaSerializationBasicBlock* basicBlock =
+ GetTosaMappingFromLayer(PolymorphicDowncast<Layer*>(add));
+ AssertTosaOneToOneMappingBasicBlock( basicBlock, inputShape, outputShape,
+ tosa::Op_MUL, Attribute_MulAttribute, BaseDescriptor(), LayerType::Multiplication);
+}
+
TEST_CASE("GetTosaMapping_AvgPool2DLayer")
{
Pooling2dDescriptor descriptor;
@@ -616,6 +664,64 @@ TEST_CASE("GetTosaMappingFromLayer_TransposeConv2dLayer")
LayerType::TransposeConvolution2d);
}
+TEST_CASE("GetTosaMapping_TransposeLayer")
+{
+ TensorInfo inputInfo = TensorInfo({ 1, 1, 5, 3 }, DataType::Float32, 0.0f, 0, true);
+ TensorInfo outputInfo = TensorInfo({ 1, 5, 1, 3 }, DataType::Float32, 0.0f, 0, true);
+
+ std::vector<std::vector<int32_t>> inputShape = {{ 1, 1, 5, 3 }};
+ std::vector<std::vector<int32_t>> outputShape = {{ 1, 5, 1, 3 }};
+
+ TransposeDescriptor transposeDescriptor = TransposeDescriptor({ 0, 2, 1 ,3 });
+
+ TosaSerializationBasicBlock* basicBlock =
+ GetTosaMapping(nullptr, LayerType::Transpose, {&inputInfo,}, {&outputInfo}, transposeDescriptor);
+ AssertTosaOneToOneMappingBasicBlock(basicBlock,
+ inputShape,
+ outputShape,
+ Op_TRANSPOSE,
+ Attribute_TransposeAttribute,
+ transposeDescriptor,
+ LayerType::Transpose);
+}
+
+TEST_CASE("GetTosaMappingFromLayer_TransposeLayer")
+{
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // Builds up the structure of the network.
+ INetworkPtr net(INetwork::Create());
+
+ TransposeDescriptor transposeDescriptor = TransposeDescriptor({ 0, 2, 1 ,3 });
+
+ IConnectableLayer* input = net->AddInputLayer(0, "input0");
+ IConnectableLayer* transpose = net->AddTransposeLayer(transposeDescriptor, "transpose");
+ IConnectableLayer* output = net->AddOutputLayer(0, "output");
+
+ input->GetOutputSlot(0).Connect(transpose->GetInputSlot(0));
+ transpose->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ TensorInfo inputInfo = TensorInfo({ 1, 1, 5, 3 }, DataType::Float32, 0.0f, 0, true);
+ TensorInfo outputInfo = TensorInfo({ 1, 5, 1, 3 }, DataType::Float32, 0.0f, 0, true);
+
+ input->GetOutputSlot(0).SetTensorInfo(inputInfo);
+ transpose->GetOutputSlot(0).SetTensorInfo(outputInfo);
+
+ std::vector<std::vector<int32_t>> inputShape = {{ 1, 1, 5, 3 }};
+ std::vector<std::vector<int32_t>> outputShape = {{ 1, 5, 1, 3 }};
+
+ TosaSerializationBasicBlock* basicBlock =
+ GetTosaMappingFromLayer(PolymorphicDowncast<Layer*>(transpose));
+ AssertTosaOneToOneMappingBasicBlock(basicBlock,
+ inputShape,
+ outputShape,
+ Op_TRANSPOSE,
+ Attribute_TransposeAttribute,
+ transposeDescriptor,
+ LayerType::Transpose);
+}
+
TEST_CASE("GetTosaMapping_Unimplemented")
{
TosaSerializationBasicBlock* basicBlock =
diff --git a/src/backends/tosaCommon/test/TosaTestUtils.hpp b/src/backends/tosaCommon/test/TosaTestUtils.hpp
index 140cb83983..e24055371f 100644
--- a/src/backends/tosaCommon/test/TosaTestUtils.hpp
+++ b/src/backends/tosaCommon/test/TosaTestUtils.hpp
@@ -158,6 +158,14 @@ inline void VerifyTosaAttribute(const BaseDescriptor& descriptor,
CHECK(stride == transposeConvAttribute.stride());
break;
}
+ case LayerType::Transpose:
+ {
+ auto transposeDesc = PolymorphicDowncast<const TransposeDescriptor*>(&descriptor);
+ std::vector<int> outPerm(transposeDesc->m_DimMappings.begin(), transposeDesc->m_DimMappings.end());
+ TosaTransposeAttribute transposeAttribute(attribute);
+ CHECK(outPerm == transposeAttribute.perms());
+ break;
+ }
default:
break;
}
diff --git a/src/backends/tosaReference/TosaRefLayerSupport.cpp b/src/backends/tosaReference/TosaRefLayerSupport.cpp
index 0d0d07a783..6113b5861a 100644
--- a/src/backends/tosaReference/TosaRefLayerSupport.cpp
+++ b/src/backends/tosaReference/TosaRefLayerSupport.cpp
@@ -38,6 +38,8 @@ bool TosaRefLayerSupport::IsLayerSupported(const LayerType& type,
case LayerType::Output:
return true;
case LayerType::Addition:
+ case LayerType::Multiplication:
+ case LayerType::Subtraction:
// Setup inputs and outputs
inputInfos.push_back(&infos[0]);
inputInfos.push_back(&infos[1]);
@@ -69,7 +71,7 @@ bool TosaRefLayerSupport::IsLayerSupported(const LayerType& type,
case LayerType::Pooling2d:
case LayerType::Reshape:
case LayerType::Slice:
- // Setup inputs and outputs
+ case LayerType::Transpose:
inputInfos.push_back(&infos[0]);
outputInfos.push_back(&infos[1]);
break;
diff --git a/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp b/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp
index a377293fbf..e19462e986 100644
--- a/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp
+++ b/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp
@@ -8,10 +8,13 @@
#include "backendsCommon/test/AdditionEndToEndTestImpl.hpp"
#include "backendsCommon/test/Convolution2dEndToEndTestImpl.hpp"
#include "backendsCommon/test/ConcatEndToEndTestImpl.hpp"
+#include "backendsCommon/test/MultiplicationEndToEndTestImpl.hpp"
#include "backendsCommon/test/Pooling2dEndToEndTestImpl.hpp"
#include "backendsCommon/test/ReshapeEndToEndTestImpl.hpp"
#include "backendsCommon/test/SliceEndToEndTestImpl.hpp"
+#include "backendsCommon/test/SubtractionEndToEndTestImpl.hpp"
#include "backendsCommon/test/TransposeConvolution2dEndToEndTestImpl.hpp"
+#include "backendsCommon/test/TransposeEndToEndTestImpl.hpp"
#include <doctest/doctest.h>
@@ -150,6 +153,35 @@ TEST_CASE("TosaRefSliceEndtoEndTestFloat16")
{
SliceEndToEndFloat16<DataType::Float16>(tosaDefaultBackends);
}
+TEST_CASE("TosaRefSubtractionEndtoEndTestFloat32")
+{
+ SubtractionEndToEnd<DataType::Float32>(tosaDefaultBackends);
+}
+
+TEST_CASE("TosaRefSubtractionEndtoEndTestInt32")
+{
+ SubtractionEndToEnd<DataType::Signed32>(tosaDefaultBackends);
+}
+
+TEST_CASE("TosaRefSubtractionEndtoEndTestFloat16")
+{
+ SubtractionEndToEndFloat16<DataType::Float16>(tosaDefaultBackends);
+}
+
+TEST_CASE("TosaRefMultiplicationEndtoEndTestFloat32")
+{
+ MultiplicationEndToEnd<DataType::Float32>(tosaDefaultBackends);
+}
+
+TEST_CASE("TosaRefMultiplicationEndtoEndTestInt32")
+{
+ MultiplicationEndToEnd<DataType::Signed32>(tosaDefaultBackends);
+}
+
+TEST_CASE("TosaRefMultiplicationEndtoEndTestFloat16")
+{
+ MultiplicationEndToEndFloat16<DataType::Float16>(tosaDefaultBackends);
+}
// TransposeConvolution2d
TEST_CASE("TosaRefTransposeConvolution2dEndToEndFloatNhwcTest")
@@ -164,4 +196,10 @@ TEST_CASE("TosaRefSimpleTransposeConvolution2dEndToEndFloatNhwcTest")
tosaDefaultBackends, armnn::DataLayout::NHWC);
}
+// Transpose
+TEST_CASE("TosaRefTransposeEndtoEndTestFloat32")
+{
+ TransposeEndToEnd<armnn::DataType::Float32>(tosaDefaultBackends);
+}
+
} \ No newline at end of file
diff --git a/src/backends/tosaReference/test/TosaRefLayerSupportTests.cpp b/src/backends/tosaReference/test/TosaRefLayerSupportTests.cpp
index 051965f541..66dfbe8dff 100644
--- a/src/backends/tosaReference/test/TosaRefLayerSupportTests.cpp
+++ b/src/backends/tosaReference/test/TosaRefLayerSupportTests.cpp
@@ -190,6 +190,50 @@ TEST_CASE("IsLayerSupportedTosaReferenceConv2dUnsupported")
CHECK(!supported);
}
+TEST_CASE("IsLayerSupportedTosaReferenceMultiplication")
+{
+ TensorShape shape0 = {1,1,3,4};
+ TensorShape shape1 = {1,1,3,4};
+ TensorShape outShape = {1,1,3,4};
+ TensorInfo in0(shape0, armnn::DataType::Float32);
+ TensorInfo in1(shape1, armnn::DataType::Float32);
+ TensorInfo out(outShape, armnn::DataType::Float32);
+
+ BaseDescriptor desc;
+ TosaRefLayerSupport supportChecker;
+ std::string reasonIfNotSupported;
+ auto supported = supportChecker.IsLayerSupported(armnn::LayerType::Multiplication,
+ {in0, in1, out},
+ desc,
+ armnn::EmptyOptional(),
+ armnn::EmptyOptional(),
+ reasonIfNotSupported);
+
+ CHECK(supported);
+}
+
+TEST_CASE("IsLayerSupportedTosaReferenceMultiplicationUnsupported")
+{
+ TensorShape shape0 = {1,1,3,4};
+ TensorShape shape1 = {1,2,3,4};
+ TensorShape outShape = {1,1,3,4};
+ TensorInfo in0(shape0, armnn::DataType::Signed64);
+ TensorInfo in1(shape1, armnn::DataType::Signed64);
+ TensorInfo out(outShape, armnn::DataType::Signed64);
+
+ BaseDescriptor desc;
+ TosaRefLayerSupport supportChecker;
+ std::string reasonIfNotSupported;
+ auto supported = supportChecker.IsLayerSupported(armnn::LayerType::Multiplication,
+ {in0, in1, out},
+ desc,
+ armnn::EmptyOptional(),
+ armnn::EmptyOptional(),
+ reasonIfNotSupported);
+
+ CHECK(!supported);
+}
+
TEST_CASE("IsLayerSupportedTosaReferenceMaxPooling2d")
{
TensorShape inShape = {1,1,3,4};
@@ -376,6 +420,50 @@ TEST_CASE("IsLayerSupportedTosaReferenceSliceUnsupported")
CHECK(!supported);
}
+TEST_CASE("IsLayerSupportedTosaReferenceSubtraction")
+{
+ TensorShape shape0 = {1,1,3,4};
+ TensorShape shape1 = {1,1,3,4};
+ TensorShape outShape = {1,1,3,4};
+ TensorInfo in0(shape0, armnn::DataType::Float32);
+ TensorInfo in1(shape1, armnn::DataType::Float32);
+ TensorInfo out(outShape, armnn::DataType::Float32);
+
+ BaseDescriptor desc;
+ TosaRefLayerSupport supportChecker;
+ std::string reasonIfNotSupported;
+ auto supported = supportChecker.IsLayerSupported(armnn::LayerType::Subtraction,
+ {in0, in1, out},
+ desc,
+ armnn::EmptyOptional(),
+ armnn::EmptyOptional(),
+ reasonIfNotSupported);
+
+ CHECK(supported);
+}
+
+TEST_CASE("IsLayerSupportedTosaReferenceSubtractionUnsupported")
+{
+ TensorShape shape0 = {1,1,3,4};
+ TensorShape shape1 = {4};
+ TensorShape outShape = {1,1,3,4};
+ TensorInfo in0(shape0, armnn::DataType::Signed64);
+ TensorInfo in1(shape1, armnn::DataType::Signed64);
+ TensorInfo out(outShape, armnn::DataType::Signed64);
+
+ BaseDescriptor desc;
+ TosaRefLayerSupport supportChecker;
+ std::string reasonIfNotSupported;
+ auto supported = supportChecker.IsLayerSupported(armnn::LayerType::Subtraction,
+ {in0, in1, out},
+ desc,
+ armnn::EmptyOptional(),
+ armnn::EmptyOptional(),
+ reasonIfNotSupported);
+
+ CHECK(!supported);
+}
+
TEST_CASE("IsLayerSupportedTosaReferenceTransposeConv2d")
{
TensorInfo inputInfo ({ 1, 3, 3, 1 }, DataType::Float32);
@@ -421,4 +509,46 @@ TEST_CASE("IsLayerSupportedTosaReferenceTransposeConv2dUnsupported")
CHECK(!supported);
}
+TEST_CASE("IsLayerSupportedTosaReferenceTranspose")
+{
+ TensorShape inShape = { 1, 1, 5, 3 };
+ TensorShape outShape = { 1, 5, 1, 3 };
+ TensorInfo in(inShape, DataType::Float32);
+ TensorInfo out(outShape, DataType::Float32);
+
+ TransposeDescriptor transposeDescriptor = TransposeDescriptor({ 0, 2, 1 ,3 });
+
+ TosaRefLayerSupport supportChecker;
+ std::string reasonIfNotSupported;
+ auto supported = supportChecker.IsLayerSupported(LayerType::Transpose,
+ {in, out},
+ transposeDescriptor,
+ EmptyOptional(),
+ EmptyOptional(),
+ reasonIfNotSupported);
+
+ CHECK(supported);
+}
+
+TEST_CASE("IsLayerSupportedTosaReferenceTransposeUnsupported")
+{
+ TensorShape inShape = { 1, 1, 5, 3 };
+ TensorShape outShape = { 1, 5, 1, 3 };
+ TensorInfo in(inShape, DataType::Signed64);
+ TensorInfo out(outShape, DataType::Signed64);
+
+ TransposeDescriptor transposeDescriptor = TransposeDescriptor({ 0, 2, 1 ,3 });
+
+ TosaRefLayerSupport supportChecker;
+ std::string reasonIfNotSupported;
+ auto supported = supportChecker.IsLayerSupported(LayerType::Transpose,
+ {in, out},
+ transposeDescriptor,
+ EmptyOptional(),
+ EmptyOptional(),
+ reasonIfNotSupported);
+
+ CHECK(!supported);
+}
+
}