diff options
author | Cathal Corbett <cathal.corbett@arm.com> | 2023-01-12 11:17:03 +0000 |
---|---|---|
committer | Cathal Corbett <cathal.corbett@arm.com> | 2023-01-12 11:18:21 +0000 |
commit | d69c1c595375b904a7f19f562ac1d54098184b4e (patch) | |
tree | b2c4980eb367aa160282aae5c2deda8ef19682de /src | |
parent | 267c985a6322fbc1efa22ba44188ac867537f1b1 (diff) | |
download | armnn-d69c1c595375b904a7f19f562ac1d54098184b4e.tar.gz |
Merge 'main' onto 'experimental/GpuFsa'.
* I6c71be11e9b73694747b27fe9febab8d9669b4d4
Signed-off-by: Cathal Corbett <cathal.corbett@arm.com>
Change-Id: Iccaf50e2484559979d801ee9d0e130e848554733
Diffstat (limited to 'src')
65 files changed, 1983 insertions, 643 deletions
diff --git a/src/armnn/AsyncExecutionCallback.cpp b/src/armnn/AsyncExecutionCallback.cpp index 5b87927af2..73ce66b7fb 100644 --- a/src/armnn/AsyncExecutionCallback.cpp +++ b/src/armnn/AsyncExecutionCallback.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2021-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -11,6 +11,8 @@ namespace armnn namespace experimental { +InferenceId AsyncExecutionCallback::nextID = 0u; + void AsyncExecutionCallback::Notify(armnn::Status status, InferenceTimingPair timeTaken) { { diff --git a/src/armnn/AsyncExecutionCallback.hpp b/src/armnn/AsyncExecutionCallback.hpp index 9eab06b4fa..d48f80737d 100644 --- a/src/armnn/AsyncExecutionCallback.hpp +++ b/src/armnn/AsyncExecutionCallback.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2021-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -68,7 +68,6 @@ private: armnn::Status m_Status = Status::Failure; InferenceId m_InferenceId; }; -InferenceId AsyncExecutionCallback::nextID = 0u; // Manager to create and monitor AsyncExecutionCallbacks // GetNewCallback will create a callback for use in Threadpool::Schedule diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp index b5769f75f3..e5d123830c 100644 --- a/src/armnn/Graph.cpp +++ b/src/armnn/Graph.cpp @@ -497,13 +497,19 @@ void Graph::ReplaceSubgraphConnections(const SubgraphView& subgraph, const Subgr IInputSlot* subgraphInputSlot = subgraphInputSlots.at(inputSlotIdx); ARMNN_ASSERT(subgraphInputSlot); - IOutputSlot* connectedOutputSlot = subgraphInputSlot->GetConnection(); - ARMNN_ASSERT(connectedOutputSlot); - connectedOutputSlot->Disconnect(*subgraphInputSlot); + // Only disconnect if the InputSlot has a connection, this might not be the case when + // dealing with working copies of SubgraphViews + // Note: we don't need this check for OutputSlot as it iterates over a vector of valid connections + if (subgraphInputSlot->GetConnection()) + { + IOutputSlot* connectedOutputSlot = subgraphInputSlot->GetConnection(); + ARMNN_ASSERT(connectedOutputSlot); + connectedOutputSlot->Disconnect(*subgraphInputSlot); - IInputSlot* substituteInputSlot = substituteSubgraphInputSlots.at(inputSlotIdx); - ARMNN_ASSERT(substituteInputSlot); - connectedOutputSlot->Connect(*substituteInputSlot); + IInputSlot* substituteInputSlot = substituteSubgraphInputSlots.at(inputSlotIdx); + ARMNN_ASSERT(substituteInputSlot); + connectedOutputSlot->Connect(*substituteInputSlot); + } } // Step 2: process output slots diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp index 7b24fd77b8..b42874f29d 100644 --- a/src/armnn/LoadedNetwork.cpp +++ b/src/armnn/LoadedNetwork.cpp @@ -1328,6 +1328,7 @@ void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* } else { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "CopyInput"); std::unique_ptr<ITensorHandle> tensorHandle = std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea()); @@ -1374,6 +1375,7 @@ void LoadedNetwork::ImportOutputTensor(const Tensor& outputTensor, ITensorHandle void CopyToOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle) { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "CopyOutput"); auto copyFunc = [](void* dst, const void* src, size_t size) { memcpy(dst, src, size); diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 158142f48e..42388bfbd7 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -626,7 +626,14 @@ OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings, // need to set the compute device on the layer // before we can check if it is supported layer->SetBackendId(backend); - if (!IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported)) + + // To run FP16 operations on CpuAcc we need at least v8.2 architecture. If the available architecture + // is older than v8.2, we can check if the operator is supported by changing operator inputs & outputs + // to be FP32 and inserting convert layers around the FP32 operator. + bool isLayerSupported = IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported); + std::string checkStr = "This CPU architecture does not support F16 data type, you need v8.2 or above"; + if (!isLayerSupported || + reasonIfUnsupported.find(checkStr) != std::string::npos) { if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16) { @@ -1568,8 +1575,6 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, const OptimizerOptions& options, Optional<std::vector<std::string>&> messages) { - const auto start_time = armnn::GetTimeNow(); - ARMNN_LOG(debug) << options.ToString(); // Enable profiling @@ -1750,9 +1755,6 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry); } - ARMNN_LOG(info) << "!! New time !! : " << std::setprecision(2) - << std::fixed << armnn::GetTimeDuration(start_time).count() << " ms."; - return optNet; } diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp index 75b1ee8179..ff64e856f4 100644 --- a/src/armnn/Runtime.cpp +++ b/src/armnn/Runtime.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017, 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -20,7 +20,10 @@ #include <armnn/utility/PolymorphicDowncast.hpp> #include <armnn/utility/Timer.hpp> +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) #include <backendsCommon/DynamicBackendUtils.hpp> +#endif + #include <backendsCommon/memoryOptimizerStrategyLibrary/MemoryOptimizerStrategyLibrary.hpp> #include <client/include/backends/IBackendProfiling.hpp> @@ -334,11 +337,11 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options) throw RuntimeException( "It is not possible to enable timeline reporting without profiling being enabled"); } - +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) // Load any available/compatible dynamic backend before the runtime // goes through the backend registry LoadDynamicBackends(options.m_DynamicBackendsPath); - +#endif armnn::BackendIdSet supportedBackends; for (const auto& id : BackendRegistryInstance().GetBackendIds()) { @@ -354,9 +357,11 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options) if (customAllocatorMapIterator != options.m_CustomAllocatorMap.end() && customAllocatorMapIterator->second == nullptr) { +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) // We need to manually clean up the dynamic backends before throwing an exception. DynamicBackendUtils::DeregisterDynamicBackends(m_DeviceSpec.GetDynamicBackends()); m_DeviceSpec.ClearDynamicBackends(); +#endif throw armnn::Exception("Allocator associated with id " + id.Get() + " is null"); } @@ -393,6 +398,7 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options) } // No errors so register the Custom Allocator with the BackendRegistry BackendRegistryInstance().RegisterAllocator(id, customAllocatorMapIterator->second); + m_AllocatorsAddedByThisRuntime.emplace(id); } else { @@ -428,6 +434,7 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options) } // No errors so register the Custom Allocator with the BackendRegistry BackendRegistryInstance().RegisterAllocator(id, customAllocatorMapIterator->second); + m_AllocatorsAddedByThisRuntime.emplace(id); } } @@ -577,13 +584,20 @@ RuntimeImpl::~RuntimeImpl() << std::endl; } } - +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) // Clear all dynamic backends. DynamicBackendUtils::DeregisterDynamicBackends(m_DeviceSpec.GetDynamicBackends()); m_DeviceSpec.ClearDynamicBackends(); +#endif m_BackendContexts.clear(); BackendRegistryInstance().SetProfilingService(armnn::EmptyOptional()); + // Remove custom allocators that this runtime has added. + // Note: that as backends can be per process and there can be many instances of a runtime in a process an allocator + // may have been overwritten by another runtime. + for_each(m_AllocatorsAddedByThisRuntime.begin(), m_AllocatorsAddedByThisRuntime.end(), + [](BackendId id) {BackendRegistryInstance().DeregisterAllocator(id);}); + ARMNN_LOG(info) << "Shutdown time: " << std::setprecision(2) << std::fixed << armnn::GetTimeDuration(startTime).count() << " ms."; } @@ -755,6 +769,7 @@ void RuntimeImpl::RegisterDebugCallback(NetworkId networkId, const DebugCallback loadedNetwork->RegisterDebugCallback(func); } +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) void RuntimeImpl::LoadDynamicBackends(const std::string& overrideBackendPath) { // Get the paths where to load the dynamic backends from @@ -772,5 +787,5 @@ void RuntimeImpl::LoadDynamicBackends(const std::string& overrideBackendPath) // Add the registered dynamic backend ids to the list of supported backends m_DeviceSpec.AddSupportedBackends(registeredBackendIds, true); } - +#endif } // namespace armnn diff --git a/src/armnn/Runtime.hpp b/src/armnn/Runtime.hpp index f5dfadf948..9d47b7898d 100644 --- a/src/armnn/Runtime.hpp +++ b/src/armnn/Runtime.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once @@ -157,6 +157,10 @@ private: /// Profiling Service Instance std::unique_ptr<arm::pipe::IProfilingService> m_ProfilingService; + + /// Keep track of backend ids of the custom allocators that this instance of the runtime added. The + /// destructor can then clean up for this runtime. + std::set<BackendId> m_AllocatorsAddedByThisRuntime; }; } // namespace armnn diff --git a/src/armnn/TypesUtils.cpp b/src/armnn/TypesUtils.cpp index 4ba9ed19e1..74ac231bc9 100644 --- a/src/armnn/TypesUtils.cpp +++ b/src/armnn/TypesUtils.cpp @@ -81,4 +81,8 @@ float armnn::Dequantize<int16_t>(int16_t value, float scale, int32_t offset); /// Explicit specialization of Dequantize for int32_t template -float armnn::Dequantize<int32_t>(int32_t value, float scale, int32_t offset);
\ No newline at end of file +float armnn::Dequantize<int32_t>(int32_t value, float scale, int32_t offset); + +/// Explicit specialization of Dequantize for int64_t +template +float armnn::Dequantize<int64_t>(int64_t value, float scale, int32_t offset); diff --git a/src/armnn/test/SubgraphViewTests.cpp b/src/armnn/test/SubgraphViewTests.cpp index 4ce67b0fec..9bb5e69bbb 100644 --- a/src/armnn/test/SubgraphViewTests.cpp +++ b/src/armnn/test/SubgraphViewTests.cpp @@ -2063,6 +2063,35 @@ TEST_CASE("SubgraphViewWorkingCopySubstituteSubgraph") CHECK_THROWS_AS(workingCopy.GetWorkingCopy(), Exception); } +TEST_CASE("SubgraphViewPartialWorkingCopySubstituteSubgraph") +{ + Graph graph; + + auto input = graph.AddLayer<InputLayer>(0, "Input"); + auto activation = graph.AddLayer<ActivationLayer>(ActivationDescriptor{}, "Activation"); + auto output = graph.AddLayer<OutputLayer>(1, "Output"); + + input->GetOutputSlot(0).Connect(activation->GetInputSlot(0)); + activation->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + //Add in out of order + auto view = CreateSubgraphViewFrom({activation}, + {&activation->GetInputSlot(0)}, + {&activation->GetOutputSlot(0)}); + + auto workingCopy = view->GetWorkingCopy(); + + // First (and only) layer in the subgraph is the Activation + CHECK(std::string((*workingCopy.beginIConnectable())->GetName()) == "Activation"); + + // Substitute the "Activation" layer for an equivalent layer + auto activation2 = graph.AddLayer<ActivationLayer>(ActivationDescriptor{}, "Activation2"); + SubgraphView pattern(*workingCopy.beginIConnectable()); + workingCopy.SubstituteSubgraph(pattern, activation2); + + CHECK(std::string((*workingCopy.beginIConnectable())->GetName()) == "Activation2"); +} + TEST_CASE("SubgraphViewWorkingCopyOptimizationViews") { Graph graph; diff --git a/src/armnnOnnxParser/OnnxParser.cpp b/src/armnnOnnxParser/OnnxParser.cpp index 63fb60382c..552d4e4163 100644 --- a/src/armnnOnnxParser/OnnxParser.cpp +++ b/src/armnnOnnxParser/OnnxParser.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "OnnxParser.hpp" @@ -50,6 +50,17 @@ armnn::INetworkPtr IOnnxParser::CreateNetworkFromBinaryFile(const char* graphFil return pOnnxParserImpl->CreateNetworkFromBinaryFile(graphFile); } +armnn::INetworkPtr IOnnxParser::CreateNetworkFromBinary(const std::vector<uint8_t>& binaryContent) +{ + return pOnnxParserImpl->CreateNetworkFromBinary(binaryContent); +} + +armnn::INetworkPtr IOnnxParser::CreateNetworkFromBinary(const std::vector<uint8_t>& binaryContent, + const std::map<std::string, armnn::TensorShape>& inputShapes) +{ + return pOnnxParserImpl->CreateNetworkFromBinary(binaryContent, inputShapes); +} + armnn::INetworkPtr IOnnxParser::CreateNetworkFromTextFile(const char* graphFile) { return pOnnxParserImpl->CreateNetworkFromTextFile(graphFile); @@ -731,6 +742,44 @@ INetworkPtr OnnxParserImpl::CreateNetworkFromTextFile(const char* graphFile, return CreateNetworkFromModel(*modelProto); } +INetworkPtr OnnxParserImpl::CreateNetworkFromBinary(const std::vector<uint8_t>& binaryContent) +{ + ResetParser(); + ModelPtr modelProto = LoadModelFromBinary(binaryContent); + return CreateNetworkFromModel(*modelProto); +} + +INetworkPtr OnnxParserImpl::CreateNetworkFromBinary(const std::vector<uint8_t>& binaryContent, + const std::map<std::string, armnn::TensorShape>& inputShapes) +{ + ResetParser(); + m_InputShapes = inputShapes; + ModelPtr modelProto = LoadModelFromBinary(binaryContent); + return CreateNetworkFromModel(*modelProto); +} + +ModelPtr OnnxParserImpl::LoadModelFromBinary(const std::vector<uint8_t>& binaryContent) +{ + if (binaryContent.size() == 0) + { + throw ParseException(fmt::format("Missing binary content", CHECK_LOCATION().AsString())); + } + // Parse the file into a message + ModelPtr modelProto = std::make_unique<onnx::ModelProto>(); + + google::protobuf::io::CodedInputStream codedStream(binaryContent.data(), static_cast<int>(binaryContent.size())); + codedStream.SetTotalBytesLimit(INT_MAX); + bool success = modelProto.get()->ParseFromCodedStream(&codedStream); + + if (!success) + { + std::stringstream error; + error << "Failed to parse graph"; + throw ParseException(fmt::format("{} {}", error.str(), CHECK_LOCATION().AsString())); + } + return modelProto; +} + ModelPtr OnnxParserImpl::LoadModelFromBinaryFile(const char* graphFile) { FILE* fd = fopen(graphFile, "rb"); diff --git a/src/armnnOnnxParser/OnnxParser.hpp b/src/armnnOnnxParser/OnnxParser.hpp index bb94472c6d..c9f321a5b5 100644 --- a/src/armnnOnnxParser/OnnxParser.hpp +++ b/src/armnnOnnxParser/OnnxParser.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once @@ -38,6 +38,13 @@ public: armnn::INetworkPtr CreateNetworkFromBinaryFile(const char* graphFile, const std::map<std::string, armnn::TensorShape>& inputShapes); + /// Create the network from a protobuf binary + armnn::INetworkPtr CreateNetworkFromBinary(const std::vector<uint8_t>& binaryContent); + + /// Create the network from a protobuf binary, with inputShapes specified + armnn::INetworkPtr CreateNetworkFromBinary(const std::vector<uint8_t>& binaryContent, + const std::map<std::string, armnn::TensorShape>& inputShapes); + /// Create the network from a protobuf text file on disk armnn::INetworkPtr CreateNetworkFromTextFile(const char* graphFile); @@ -64,6 +71,7 @@ public: OnnxParserImpl(); ~OnnxParserImpl() = default; + static ModelPtr LoadModelFromBinary(const std::vector<uint8_t>& binaryContent); static ModelPtr LoadModelFromBinaryFile(const char * fileName); static ModelPtr LoadModelFromTextFile(const char * fileName); static ModelPtr LoadModelFromString(const std::string& inputString); diff --git a/src/armnnSerializer/CMakeLists.txt b/src/armnnSerializer/CMakeLists.txt index 8acdafbc28..01c51e65d7 100755 --- a/src/armnnSerializer/CMakeLists.txt +++ b/src/armnnSerializer/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright © 2017, 2019-2020, 2022 Arm Ltd and Contributors. All rights reserved. +# Copyright © 2017, 2019-2020, 2022-2023 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # if(BUILD_ARMNN_SERIALIZER) @@ -36,7 +36,7 @@ if(BUILD_ARMNN_SERIALIZER) ../armnnDeserializer/Deserializer.cpp ) - if(BUILD_BARE_METAL) + if(BUILD_BARE_METAL OR EXECUTE_NETWORK_STATIC) add_library_ex(armnnSerializer STATIC ${armnn_serializer_sources}) else() # We're going to export both a STATIC library and a SHARED library here. @@ -52,9 +52,11 @@ if(BUILD_ARMNN_SERIALIZER) target_include_directories(armnnSerializer PRIVATE ../armnn) target_include_directories(armnnSerializer PRIVATE ../armnnUtils) target_include_directories(armnnSerializer PRIVATE ../../generated) - target_include_directories(armnnSerializer-static PRIVATE ../armnn) - target_include_directories(armnnSerializer-static PRIVATE ../armnnUtils) - target_include_directories(armnnSerializer-static PRIVATE ../../generated) + if (NOT BARE_METAL AND NOT EXECUTE_NETWORK_STATIC) + target_include_directories(armnnSerializer-static PRIVATE ../armnn) + target_include_directories(armnnSerializer-static PRIVATE ../armnnUtils) + target_include_directories(armnnSerializer-static PRIVATE ../../generated) + endif() list(APPEND armnn_serializer_sources ArmnnSchema_generated.h @@ -64,12 +66,13 @@ if(BUILD_ARMNN_SERIALIZER) target_include_directories(armnnSerializer SYSTEM PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) target_link_libraries(armnnSerializer armnn ${FLATBUFFERS_LIBRARY}) - - install(TARGETS armnnSerializer-static - EXPORT armnn-targets - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - ) + if (NOT BARE_METAL AND NOT EXECUTE_NETWORK_STATIC) + install(TARGETS armnnSerializer-static + EXPORT armnn-targets + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + endif() install(TARGETS armnnSerializer EXPORT armnn-targets diff --git a/src/armnnTestUtils/CMakeLists.txt b/src/armnnTestUtils/CMakeLists.txt index 3f6fb415a2..a4333cf306 100755 --- a/src/armnnTestUtils/CMakeLists.txt +++ b/src/armnnTestUtils/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +# Copyright © 2021, 2023 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # @@ -31,7 +31,7 @@ list(APPEND armnnTestUtils_sources TestUtils.hpp ) -if(NOT BUILD_BARE_METAL) +if(NOT BUILD_BARE_METAL AND NOT EXECUTE_NETWORK_STATIC) list(APPEND armnnTestUtils_sources UnitTests.cpp UnitTests.hpp @@ -41,6 +41,8 @@ endif() if(BUILD_BARE_METAL) add_library_ex(armnnTestUtils STATIC ${armnnTestUtils_sources}) +elseif(EXECUTE_NETWORK_STATIC) + add_library_ex(armnnTestUtils OBJECT ${armnnTestUtils_sources}) else() add_library_ex(armnnTestUtils SHARED ${armnnTestUtils_sources}) endif() diff --git a/src/armnnTfLiteParser/CMakeLists.txt b/src/armnnTfLiteParser/CMakeLists.txt index f9653b6752..6096d1bf8c 100755 --- a/src/armnnTfLiteParser/CMakeLists.txt +++ b/src/armnnTfLiteParser/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright © 2017 Arm Ltd. All rights reserved. +# Copyright © 2017, 2023 Arm Ltd. All rights reserved. # SPDX-License-Identifier: MIT # if(BUILD_TF_LITE_PARSER) @@ -11,7 +11,11 @@ if(BUILD_TF_LITE_PARSER) TfLiteParser.cpp ) - add_library_ex(armnnTfLiteParser SHARED ${armnn_tf_lite_parser_sources}) + if(EXECUTE_NETWORK_STATIC) + add_library_ex(armnnTfLiteParser OBJECT ${armnn_tf_lite_parser_sources}) + else() + add_library_ex(armnnTfLiteParser SHARED ${armnn_tf_lite_parser_sources}) + endif() include_directories(SYSTEM "${FLATBUFFERS_INCLUDE_PATH}") set_target_properties(armnnTfLiteParser PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}) diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp index 0484c6f478..f6c1ee9d38 100644 --- a/src/armnnTfLiteParser/TfLiteParser.cpp +++ b/src/armnnTfLiteParser/TfLiteParser.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -316,6 +316,14 @@ std::vector<unsigned int> GetUIntBuffer(armnn::TensorInfo info, ::memcpy(uint64Buffer.data(), bufferPtr->data.data(), bufferPtr->data.size()); buffer.assign(std::begin(uint64Buffer), std::end(uint64Buffer)); } + else + { + CheckLocation location = CHECK_LOCATION(); + throw ParseException( + fmt::format("Unsupported data type for uint buffer {}, only Signed 32 or Signed 64 are supported. {}", + GetDataTypeName(info.GetDataType()), + location.AsString())); + } return buffer; } @@ -911,42 +919,16 @@ INetworkPtr TfLiteParserImpl::CreateNetworkFromModel() return std::move(m_Network); } -std::unique_ptr<float[]> AsFloatArray(TfLiteParserImpl::BufferRawPtr bufferPtr, - const TensorInfo& tensorInfo) +bool TfLiteParserImpl::ShouldConstantTensorBeConverted(TfLiteParserImpl::TensorRawPtr tensorPtr, + armnn::DataType inputDataType, + armnn::DataType tensorDataType) { - if (tensorInfo.GetDataType() == DataType::QAsymmS8 || tensorInfo.GetDataType() == DataType::QSymmS8 || - tensorInfo.GetDataType() == DataType::QAsymmU8) - { - std::unique_ptr<float[]> buffer(new float[tensorInfo.GetNumElements()]); - - if (tensorInfo.HasPerAxisQuantization()) - { - unsigned int axis = tensorInfo.GetQuantizationDim().value(); - auto axisDimensionality = tensorInfo.GetShape()[axis]; - auto axisFactor = armnnUtils::GetNumElementsAfter(tensorInfo.GetShape(), axis); - - for (unsigned int i = 0; i < tensorInfo.GetNumDimensions(); ++i) - { - unsigned int axisIndex = (i / axisFactor) % axisDimensionality; - buffer[i] = Dequantize<int8_t>(bufferPtr->data[i], tensorInfo.GetQuantizationScales()[axisIndex], - tensorInfo.GetQuantizationOffset()); - } - } - else - { - for (unsigned int i = 0; i < tensorInfo.GetNumElements(); ++i) - { - buffer[i] = Dequantize<int8_t>(bufferPtr->data[i], tensorInfo.GetQuantizationScale(), - tensorInfo.GetQuantizationOffset()); - } - } - return buffer; - } - throw ParseException( - fmt::format("Unsupported input/weights combination: Input {} not supported with Weights {}", - GetDataTypeName(DataType::Float32), - GetDataTypeName(tensorInfo.GetDataType()), - CHECK_LOCATION().AsString())); + return (TfLiteParserImpl::IsConstTensor(tensorPtr) && inputDataType == DataType::Float32 && + (tensorDataType == DataType::QAsymmU8 || + tensorDataType == DataType::QAsymmS8 || + tensorDataType == DataType::QSymmS8 || + tensorDataType == DataType::Signed32 || + tensorDataType == DataType::Signed64)); } void TfLiteParserImpl::RegisterProducerOfTensor(size_t subgraphIndex, @@ -1136,9 +1118,7 @@ void TfLiteParserImpl::ParseConv2D(size_t subgraphIndex, size_t operatorIndex) auto layerName = fmt::format("Conv2D:{}:{}", subgraphIndex, operatorIndex); armnn::IConnectableLayer* layer = m_Network->AddConvolution2dLayer(desc, layerName.c_str()); - if (IsConstTensor(inputs[1]) && inputTensorInfo.GetDataType() == DataType::Float32 && - (filterTensorInfo.GetDataType() == DataType::QAsymmU8 || - filterTensorInfo.GetDataType() == DataType::QAsymmS8)) + if (ShouldConstantTensorBeConverted(inputs[1], inputTensorInfo.GetDataType(), filterTensorInfo.GetDataType())) { m_ConstantsToDequantize.emplace_back(inputs[1]->buffer); } @@ -1150,9 +1130,7 @@ void TfLiteParserImpl::ParseConv2D(size_t subgraphIndex, size_t operatorIndex) // Add the biases input to the registration list, a constant layer will be added by SetupConstantLayers. tensorIndexesToRegister.emplace_back(inputTensorIndexes[2]); - if (IsConstTensor(inputs[2]) && inputTensorInfo.GetDataType() == DataType::Float32 && - (filterTensorInfo.GetDataType() == DataType::QAsymmU8 || - filterTensorInfo.GetDataType() == DataType::QAsymmS8)) + if (ShouldConstantTensorBeConverted(inputs[2], inputTensorInfo.GetDataType(), biasTensorInfo.GetDataType())) { m_ConstantsToDequantize.emplace_back(inputs[2]->buffer); } @@ -3112,9 +3090,7 @@ void TfLiteParserImpl::ParseFullyConnected(size_t subgraphIndex, size_t operator // Add the weights input to the registration list, constant layers will be added by SetupConstantLayers if constant. tensorIndexesToRegister.emplace_back(inputTensorIndexes[1]); - if (desc.m_ConstantWeights && inputTensorInfo.GetDataType() == DataType::Float32 && - (filterTensorInfo.GetDataType() == DataType::QAsymmU8 || - filterTensorInfo.GetDataType() == DataType::QAsymmS8)) + if (ShouldConstantTensorBeConverted(inputs[1], inputTensorInfo.GetDataType(), filterTensorInfo.GetDataType())) { m_ConstantsToDequantize.emplace_back(inputs[1]->buffer); } @@ -3127,9 +3103,7 @@ void TfLiteParserImpl::ParseFullyConnected(size_t subgraphIndex, size_t operator // Add the biases input to the registration list, constant layer will be added by SetupConstantLayers. tensorIndexesToRegister.emplace_back(inputTensorIndexes[2]); - if (desc.m_ConstantWeights && inputTensorInfo.GetDataType() == DataType::Float32 && - (biasTensorInfo.GetDataType() == DataType::QAsymmU8 || - biasTensorInfo.GetDataType() == DataType::QAsymmS8)) + if (ShouldConstantTensorBeConverted(inputs[2], inputTensorInfo.GetDataType(), biasTensorInfo.GetDataType())) { m_ConstantsToDequantize.emplace_back(inputs[2]->buffer); } @@ -4925,11 +4899,22 @@ TfLiteParserImpl::CreateConstTensorNonPermuted(TensorRawPtr tensorPtr, // Make sure isConstant flag is set. tensorInfo.SetConstant(); - if (inputDataType == DataType::Float32 && tensorInfo.GetDataType() != DataType::Float32) + if (inputDataType == DataType::Float32 && tensorInfo.GetDataType() != DataType::Float32) { - TensorInfo constTensorInfo(tensorInfo.GetShape(), DataType::Float32, 0.0f, 0, true); - std::unique_ptr<float[]> data = AsFloatArray(bufferPtr, tensorInfo); - return std::make_pair(ConstTensor(constTensorInfo, data.get()), std::move(data)); + try + { + TensorInfo constTensorInfo(tensorInfo.GetShape(), DataType::Float32, 0.0f, 0, true); + std::unique_ptr<float[]> data = armnnUtils::ToFloatArray(bufferPtr->data, tensorInfo); + return std::make_pair(ConstTensor(constTensorInfo, data.get()), std::move(data)); + } + catch (InvalidArgumentException&) + { + throw ParseException( + fmt::format("Unsupported input/weights combination: Input {} not supported with Weights {}", + GetDataTypeName(DataType::Float32), + GetDataTypeName(tensorInfo.GetDataType()), + CHECK_LOCATION().AsString())); + } } else { @@ -4950,9 +4935,20 @@ TfLiteParserImpl::CreateConstTensorPtr(TensorRawPtr tensorPtr, armnn::TensorInfo if (inputTensorInfo.GetDataType() == DataType::Float32 && tensorInfo.GetDataType() != DataType::Float32) { - TensorInfo constTensorInfo(tensorInfo.GetShape(), DataType::Float32, 0.0f, 0, true); - std::unique_ptr<float[]> data = AsFloatArray(bufferPtr, tensorInfo); - return std::make_pair(new ConstTensor(constTensorInfo, data.get()), std::move(data)); + try + { + TensorInfo constTensorInfo(tensorInfo.GetShape(), DataType::Float32, 0.0f, 0, true); + std::unique_ptr<float[]> data = armnnUtils::ToFloatArray(bufferPtr->data, tensorInfo); + return std::make_pair(new ConstTensor(constTensorInfo, data.get()), std::move(data)); + } + catch (InvalidArgumentException&) + { + throw ParseException( + fmt::format("Unsupported input/weights combination: Input {} not supported with Weights {}", + GetDataTypeName(DataType::Float32), + GetDataTypeName(tensorInfo.GetDataType()), + CHECK_LOCATION().AsString())); + } } else { diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp index f8ddc55649..7eb6c48501 100644 --- a/src/armnnTfLiteParser/TfLiteParser.hpp +++ b/src/armnnTfLiteParser/TfLiteParser.hpp @@ -242,7 +242,13 @@ private: }; bool ShouldConstantTensorBeCreated(unsigned int tensorIndex); + bool IsConstTensor(TensorRawPtr tensorPtr); + + bool ShouldConstantTensorBeConverted(TfLiteParserImpl::TensorRawPtr tensorPtr, + armnn::DataType inputDataType, + armnn::DataType filterDataType); + armnn::ConstTensor CreateConstTensorNonPermuted(TensorRawPtr tensorPtr, armnn::TensorInfo& tensorInfo); @@ -250,6 +256,7 @@ private: CreateConstTensorPermuted(TensorRawPtr tensorPtr, armnn::TensorInfo& tensorInfo, armnn::Optional<armnn::PermutationVector&> permutationVector); + std::pair<armnn::ConstTensor, std::unique_ptr<float[]>> CreateConstTensorNonPermuted(TensorRawPtr tensorPtr, armnn::TensorInfo& tensorInfo, @@ -261,6 +268,7 @@ private: TfLiteParserImpl::TensorRawPtr tensorPtr, armnn::TensorInfo& tensorInfo, armnn::Optional<armnn::PermutationVector&> permutationVector); + std::pair<armnn::ConstTensor*, std::unique_ptr<float[]>> CreateConstTensorPtr(TensorRawPtr tensorPtr, armnn::TensorInfo& inputTensorInfo); diff --git a/src/armnnTfLiteParser/test/Conv2D.cpp b/src/armnnTfLiteParser/test/Conv2D.cpp index 45c4a43519..334c102344 100644 --- a/src/armnnTfLiteParser/test/Conv2D.cpp +++ b/src/armnnTfLiteParser/test/Conv2D.cpp @@ -673,7 +673,7 @@ struct Conv2FloatWithInt8WeightsAndBiasesFixture : Conv2DWithBiasesFixture "[ 1, 2, 2, 1 ]", // filterShape "[ 2,1, 0,6 ]", // filterData "[ 1 ]", // biasShape - "[ 10, 0, 0, 0 ]", // biasData + "[ 10 ]", // biasData "1", // stride w and h "NONE", // activation "1.0", // filterScale diff --git a/src/armnnUtils/TensorUtils.cpp b/src/armnnUtils/TensorUtils.cpp index d77f5d74c3..9e3d719211 100644 --- a/src/armnnUtils/TensorUtils.cpp +++ b/src/armnnUtils/TensorUtils.cpp @@ -128,12 +128,11 @@ TensorShape ExpandDims(const TensorShape& tensorShape, int axis) } outputShape.insert(outputShape.begin() + axis, 1); - return TensorShape(outputDim, outputShape.data()); + return { outputDim, outputShape.data() }; } std::vector<unsigned int> SqueezeDims(const TensorShape& tensorShape) { - unsigned int outputDimSize = 0; std::vector<unsigned int> squeezedDims; for (unsigned int i = 0; i < tensorShape.GetNumDimensions(); ++i) @@ -141,7 +140,6 @@ std::vector<unsigned int> SqueezeDims(const TensorShape& tensorShape) if (tensorShape[i] != 1) { squeezedDims.push_back(tensorShape[i]); - ++outputDimSize; } } return squeezedDims; @@ -201,4 +199,91 @@ std::pair<unsigned int, std::vector<float>> GetPerAxisParams(const armnn::Tensor return { axisFactor, scales }; } +template<typename PrimitiveType> +void CheckSizes(const std::vector<PrimitiveType>& data, const armnn::TensorInfo& tensorInfo, unsigned int size = 1) +{ + if (data.size() / size != tensorInfo.GetNumElements()) + { + throw InvalidArgumentException( + fmt::format("The data does not contain the expected number of elements {} != {}. {}", + data.size(), tensorInfo.GetNumElements(), CHECK_LOCATION().AsString())); + } +} + +template<typename PrimitiveType> +std::unique_ptr<float[]> ToFloatArray(const std::vector<PrimitiveType>& data, const armnn::TensorInfo& tensorInfo) +{ + CheckSizes(data, tensorInfo); + + std::unique_ptr<float[]> returnBuffer(new float[tensorInfo.GetNumElements()]); + + if (tensorInfo.HasPerAxisQuantization()) + { + unsigned int axis = tensorInfo.GetQuantizationDim().value(); + auto axisDimensionality = tensorInfo.GetShape()[axis]; + auto axisFactor = armnnUtils::GetNumElementsAfter(tensorInfo.GetShape(), axis); + + for (unsigned int i = 0; i < tensorInfo.GetNumElements(); ++i) + { + unsigned int axisIndex; + + if (i < axisFactor) + { + axisIndex = 0; + } + else + { + axisIndex = (i / axisFactor) % axisDimensionality; + } + returnBuffer[i] = Dequantize<PrimitiveType>(data[i], + tensorInfo.GetQuantizationScales()[axisIndex], + tensorInfo.GetQuantizationOffset()); + } + } + else + { + for (unsigned int i = 0; i < tensorInfo.GetNumElements(); ++i) + { + returnBuffer[i] = Dequantize<PrimitiveType>(data[i], + tensorInfo.GetQuantizationScale(), + tensorInfo.GetQuantizationOffset()); + } + } + return returnBuffer; +} + +std::unique_ptr<float[]> ToFloatArray(const std::vector<uint8_t>& data, const armnn::TensorInfo& tensorInfo) +{ + if (tensorInfo.GetDataType() == DataType::QAsymmS8 || tensorInfo.GetDataType() == DataType::QSymmS8) + { + CheckSizes(data, tensorInfo); + std::vector<int8_t> buffer(tensorInfo.GetNumElements()); + ::memcpy(buffer.data(), data.data(), data.size()); + return ToFloatArray<int8_t>(buffer, tensorInfo); + } + else if (tensorInfo.GetDataType() == DataType::QAsymmU8) + { + CheckSizes(data, tensorInfo); + return ToFloatArray<uint8_t>(data, tensorInfo); + } + else if (tensorInfo.GetDataType() == DataType::Signed32) + { + CheckSizes(data, tensorInfo, 4); + std::vector<int32_t> buffer(tensorInfo.GetNumElements()); + ::memcpy(buffer.data(), data.data(), data.size()); + return ToFloatArray<int32_t>(buffer, tensorInfo); + } + else if (tensorInfo.GetDataType() == DataType::Signed64) + { + CheckSizes(data, tensorInfo, 8); + std::vector<int64_t> buffer(tensorInfo.GetNumElements()); + ::memcpy(buffer.data(), data.data(), data.size()); + return ToFloatArray<int64_t>(buffer, tensorInfo); + } + throw InvalidArgumentException( + fmt::format("Unsupported datatype {}. {}", + GetDataTypeName(tensorInfo.GetDataType()), + CHECK_LOCATION().AsString())); +} + } // namespace armnnUtils diff --git a/src/armnnUtils/test/TensorUtilsTest.cpp b/src/armnnUtils/test/TensorUtilsTest.cpp index 6d5f719eb1..16349c554e 100644 --- a/src/armnnUtils/test/TensorUtilsTest.cpp +++ b/src/armnnUtils/test/TensorUtilsTest.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2019 Arm Ltd. All rights reserved. +// Copyright © 2019,2021-2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -134,4 +134,175 @@ TEST_CASE("ExpandDimsInvalidNegativeAxisTest") CHECK_THROWS_AS(ExpandDims(inputShape, -5), armnn::InvalidArgumentException); } +TEST_CASE("ToFloatArrayInvalidDataType") +{ + armnn::TensorInfo info({ 2, 3, 4 }, armnn::DataType::BFloat16); + std::vector<uint8_t> data {1,2,3,4,5,6,7,8,9,10}; + + // Invalid argument + CHECK_THROWS_AS(ToFloatArray(data, info), armnn::InvalidArgumentException); +} + +TEST_CASE("ToFloatArrayQSymmS8PerAxis") +{ + std::vector<float> quantizationScales { 0.1f, 0.2f, 0.3f, 0.4f }; + unsigned int quantizationDim = 1; + + armnn::TensorInfo info({ 3, 4 }, armnn::DataType::QSymmS8, quantizationScales, quantizationDim); + std::vector<uint8_t> data { 100, 120, 130, 140, 150, 160, 170 ,180, 190, 200, 210, 220 }; + float expected[] { 10.0f, 24.0f, -37.8f, -46.4f, -10.6f, -19.2f, -25.8f, -30.4f, -6.6f, -11.2f, -13.8f, -14.4f }; + + std::unique_ptr<float[]> result = ToFloatArray(data, info); + + for (uint i = 0; i < info.GetNumElements(); ++i) + { + CHECK_EQ(result[i], doctest::Approx(expected[i])); + } +} + +TEST_CASE("ToFloatArrayQSymmS8") +{ + armnn::TensorInfo info({ 3, 4 }, armnn::DataType::QSymmS8, 0.1f); + std::vector<uint8_t> data { 100, 120, 130, 140, 150, 160, 170 ,180, 190, 200, 210, 220 }; + float expected[] { 10.0f, 12.0f, -12.6f, -11.6f, -10.6f, -9.6f, -8.6f, -7.6f, -6.6f, -5.6f, -4.6f, -3.6f }; + + std::unique_ptr<float[]> result = ToFloatArray(data, info); + + for (uint i = 0; i < info.GetNumElements(); ++i) + { + CHECK_EQ(result[i], doctest::Approx(expected[i])); + } +} + +TEST_CASE("ToFloatArrayQAsymmS8PerAxis") +{ + std::vector<float> quantizationScales { 0.1f, 0.2f, 0.3f, 0.4f }; + unsigned int quantizationDim = 1; + + armnn::TensorInfo info({ 3, 4 }, armnn::DataType::QAsymmS8, quantizationScales, quantizationDim); + std::vector<uint8_t> data { 100, 120, 130, 140, 150, 160, 170 ,180, 190, 200, 210, 220 }; + float expected[] { 10.0f, 24.0f, -37.8f, -46.4f, -10.6f, -19.2f, -25.8f, -30.4f, -6.6f, -11.2f, -13.8f, -14.4f }; + + std::unique_ptr<float[]> result = ToFloatArray(data, info); + + for (uint i = 0; i < info.GetNumElements(); ++i) + { + CHECK_EQ(result[i], doctest::Approx(expected[i])); + } +} + +TEST_CASE("ToFloatArrayQAsymmS8") +{ + armnn::TensorInfo info({ 3, 4 }, armnn::DataType::QAsymmS8, 0.1f); + std::vector<uint8_t> data { 100, 120, 130, 140, 150, 160, 170 ,180, 190, 200, 210, 220 }; + float expected[] { 10.0f, 12.0f, -12.6f, -11.6f, -10.6f, -9.6f, -8.6f, -7.6f, -6.6f, -5.6f, -4.6f, -3.6f }; + + std::unique_ptr<float[]> result = ToFloatArray(data, info); + + for (uint i = 0; i < info.GetNumElements(); ++i) + { + CHECK_EQ(result[i], doctest::Approx(expected[i])); + } +} + +TEST_CASE("ToFloatArrayQASymmU8PerAxis") +{ + std::vector<float> quantizationScales { 0.1f, 0.2f, 0.3f, 0.4f }; + unsigned int quantizationDim = 1; + + armnn::TensorInfo info({ 3, 4 }, armnn::DataType::QAsymmU8, quantizationScales, quantizationDim); + std::vector<uint8_t> data { 100, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220 }; + float expected[] { 10.0f, 24.0f, 39.0f, 56.0f, 15.0f, 32.0f, 51.0f, 72.0f, 19.0f, 40.0f, 63.0f, 88.0f }; + + std::unique_ptr<float[]> result = ToFloatArray(data, info); + + for (uint i = 0; i < info.GetNumElements(); ++i) + { + CHECK_EQ(result[i], doctest::Approx(expected[i])); + } +} + +TEST_CASE("ToFloatArrayQAsymmU8") +{ + armnn::TensorInfo info({ 3, 4 }, armnn::DataType::QAsymmU8, 0.1f); + std::vector<uint8_t> data { 100, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220 }; + float expected[] { 10.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f }; + + std::unique_ptr<float[]> result = ToFloatArray(data, info); + + for (uint i = 0; i < info.GetNumElements(); ++i) + { + CHECK_EQ(result[i], doctest::Approx(expected[i])); + } +} + +TEST_CASE("ToFloatArraySigned32PerAxis") +{ + std::vector<float> quantizationScales { 0.1f, 0.2f, 0.3f, 0.4f }; + unsigned int quantizationDim = 1; + + armnn::TensorInfo info({ 3, 4 }, armnn::DataType::Signed32, quantizationScales, quantizationDim); + std::vector<uint8_t> data { 100, 0, 0, 0, 120, 0, 0, 0, 130, 0, 0, 0, 140, 0, 0, 0, 150, 0, 0, 0, 160, 0, 0, 0, + 170, 0, 0, 0, 180, 0, 0, 0, 190, 0, 0, 0, 200, 0, 0, 0, 210, 0, 0, 0, 220, 0, 0, 0 }; + float expected[] { 10.0f, 24.0f, 39.0f, 56.0f, 15.0f, 32.0f, 51.0f, 72.0f, 19.0f, 40.0f, 63.0f, 88.0f }; + + std::unique_ptr<float[]> result = ToFloatArray(data, info); + + for (uint i = 0; i < info.GetNumElements(); ++i) + { + CHECK_EQ(result[i], doctest::Approx(expected[i])); + } +} + +TEST_CASE("ToFloatArraySigned32") +{ + armnn::TensorInfo info({ 3, 4 }, armnn::DataType::Signed32, 0.1f); + std::vector<uint8_t> data { 100, 0, 0, 0, 120, 0, 0, 0, 130, 0, 0, 0, 140, 0, 0, 0, 150, 0, 0, 0, 160, 0, 0, 0, + 170, 0, 0, 0, 180, 0, 0, 0, 190, 0, 0, 0, 200, 0, 0, 0, 210, 0, 0, 0, 220, 0, 0, 0 }; + float expected[] { 10.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f }; + + std::unique_ptr<float[]> result = ToFloatArray(data, info); + + for (uint i = 0; i < info.GetNumElements(); ++i) + { + CHECK_EQ(result[i], doctest::Approx(expected[i])); + } +} + +TEST_CASE("ToFloatArraySigned64PerAxis") +{ + std::vector<float> quantizationScales { 0.1f, 0.2f, 0.3f, 0.4f }; + unsigned int quantizationDim = 1; + + armnn::TensorInfo info({ 3, 4 }, armnn::DataType::Signed64, quantizationScales, quantizationDim); + std::vector<uint8_t> data { 100, 0, 0, 0, 0, 0, 0, 0, 120, 0, 0, 0, 0, 0, 0, 0, 130, 0, 0, 0, 0, 0, 0, 0, + 140, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 160, 0, 0, 0, 0, 0, 0, 0, + 170, 0, 0, 0, 0, 0, 0, 0, 180, 0, 0, 0, 0, 0, 0, 0, 190, 0, 0, 0, 0, 0, 0, 0, + 200, 0, 0, 0, 0, 0, 0, 0, 210, 0, 0, 0, 0, 0, 0, 0, 220, 0, 0, 0, 0, 0, 0, 0 }; + float expected[] { 10.0f, 24.0f, 39.0f, 56.0f, 15.0f, 32.0f, 51.0f, 72.0f, 19.0f, 40.0f, 63.0f, 88.0f }; + + std::unique_ptr<float[]> result = ToFloatArray(data, info); + + for (uint i = 0; i < info.GetNumElements(); ++i) + { + CHECK_EQ(result[i], doctest::Approx(expected[i])); + } +} + +TEST_CASE("ToFloatArraySigned64") +{ + armnn::TensorInfo info({ 3, 4 }, armnn::DataType::Signed64, 0.1f); + std::vector<uint8_t> data { 100, 0, 0, 0, 0, 0, 0, 0, 120, 0, 0, 0, 0, 0, 0, 0, 130, 0, 0, 0, 0, 0, 0, 0, + 140, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 160, 0, 0, 0, 0, 0, 0, 0, + 170, 0, 0, 0, 0, 0, 0, 0, 180, 0, 0, 0, 0, 0, 0, 0, 190, 0, 0, 0, 0, 0, 0, 0, + 200, 0, 0, 0, 0, 0, 0, 0, 210, 0, 0, 0, 0, 0, 0, 0, 220, 0, 0, 0, 0, 0, 0, 0 }; + float expected[] { 10.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f }; + + std::unique_ptr<float[]> result = ToFloatArray(data, info); + + for (uint i = 0; i < info.GetNumElements(); ++i) + { + CHECK_EQ(result[i], doctest::Approx(expected[i])); + } +} } diff --git a/src/backends/aclCommon/ArmComputeTuningUtils.cpp b/src/backends/aclCommon/ArmComputeTuningUtils.cpp new file mode 100644 index 0000000000..4680541ae5 --- /dev/null +++ b/src/backends/aclCommon/ArmComputeTuningUtils.cpp @@ -0,0 +1,60 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ArmComputeTuningUtils.hpp" + +namespace armnn +{ + +IGpuAccTunedParameters* IGpuAccTunedParameters::CreateRaw(IGpuAccTunedParameters::Mode mode, + IGpuAccTunedParameters::TuningLevel tuningLevel) +{ + return new ClTunedParameters(mode, tuningLevel); +} + +IGpuAccTunedParametersPtr IGpuAccTunedParameters::Create(IGpuAccTunedParameters::Mode mode, + IGpuAccTunedParameters::TuningLevel tuningLevel) +{ + return IGpuAccTunedParametersPtr(CreateRaw(mode, tuningLevel), &IGpuAccTunedParameters::Destroy); +} + +void IGpuAccTunedParameters::Destroy(IGpuAccTunedParameters* params) +{ + delete params; +} + +ClTunedParameters::ClTunedParameters(IGpuAccTunedParameters::Mode mode, + IGpuAccTunedParameters::TuningLevel tuningLevel) + : m_Mode(mode) + , m_TuningLevel(tuningLevel) + , m_Tuner(mode == ClTunedParameters::Mode::UpdateTunedParameters) +{ +} + +void ClTunedParameters::Load(const char* filename) +{ + try + { + m_Tuner.load_from_file(filename); + } + catch (const std::exception& e) + { + throw Exception(std::string("Failed to load tuned parameters file '") + filename + "': " + e.what()); + } +} + +void ClTunedParameters::Save(const char* filename) const +{ + try + { + m_Tuner.save_to_file(filename); + } + catch (const std::exception& e) + { + throw Exception(std::string("Failed to save tuned parameters file to '") + filename + "': " + e.what()); + } +} + +}
\ No newline at end of file diff --git a/src/backends/aclCommon/ArmComputeTuningUtils.hpp b/src/backends/aclCommon/ArmComputeTuningUtils.hpp new file mode 100644 index 0000000000..6d99d3f08e --- /dev/null +++ b/src/backends/aclCommon/ArmComputeTuningUtils.hpp @@ -0,0 +1,84 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include <armnn/BackendOptions.hpp> +#include <armnn/IRuntime.hpp> +#include <armnn/Logging.hpp> + +#include <arm_compute/runtime/CL/CLTuner.h> +#include <arm_compute/runtime/CL/CLTunerTypes.h> +#include <arm_compute/runtime/CL/CLGEMMHeuristicsHandle.h> + +namespace armnn +{ + +enum class TuningLevel +{ + None, + Rapid, + Normal, + Exhaustive +}; + +inline TuningLevel ParseTuningLevel(const BackendOptions::Var& value, TuningLevel defaultValue) +{ + if (value.IsInt()) + { + int v = value.AsInt(); + if (v > static_cast<int>(TuningLevel::Exhaustive) || + v < static_cast<int>(TuningLevel::None)) + { + ARMNN_LOG(warning) << "Invalid GpuAcc tuning level ("<< v << ") selected. " + "Using default(" << static_cast<int>(defaultValue) << ")"; + } else + { + return static_cast<TuningLevel>(v); + } + } + return defaultValue; +} + +inline void ConfigureTuner(arm_compute::CLTuner &tuner, TuningLevel level) +{ + tuner.set_tune_new_kernels(true); // Turn on tuning initially. + + switch (level) + { + case TuningLevel::Rapid: + ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Rapid (1)"; + tuner.set_tuner_mode(arm_compute::CLTunerMode::RAPID); + break; + case TuningLevel::Normal: + ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Normal (2)"; + tuner.set_tuner_mode(arm_compute::CLTunerMode::NORMAL); + break; + case TuningLevel::Exhaustive: + ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Exhaustive (3)"; + tuner.set_tuner_mode(arm_compute::CLTunerMode::EXHAUSTIVE); + break; + case TuningLevel::None: + default: + tuner.set_tune_new_kernels(false); // Turn off tuning. Set to "use" only mode. + break; + } +} + +class ClTunedParameters : public IGpuAccTunedParameters +{ +public: + ClTunedParameters(IGpuAccTunedParameters::Mode mode, IGpuAccTunedParameters::TuningLevel tuningLevel); + + virtual void Load(const char* filename); + virtual void Save(const char* filename) const; + + Mode m_Mode; + TuningLevel m_TuningLevel; + + arm_compute::CLTuner m_Tuner; + arm_compute::CLGEMMHeuristicsHandle m_HeuristicsHandle; +}; + +}
\ No newline at end of file diff --git a/src/backends/aclCommon/CMakeLists.txt b/src/backends/aclCommon/CMakeLists.txt index 05fbe6cca9..b3bf89e750 100644 --- a/src/backends/aclCommon/CMakeLists.txt +++ b/src/backends/aclCommon/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright © 2017 Arm Ltd. All rights reserved. +# Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # @@ -8,9 +8,12 @@ list(APPEND armnnAclCommon_sources ArmComputeTensorHandle.hpp ArmComputeTensorUtils.hpp ArmComputeTensorUtils.cpp + ArmComputeTuningUtils.hpp + ArmComputeTuningUtils.cpp ArmComputeUtils.hpp BaseMemoryManager.cpp BaseMemoryManager.hpp + IClTensorHandle.hpp ) if(BUILD_UNIT_TESTS) diff --git a/src/backends/cl/IClTensorHandle.hpp b/src/backends/aclCommon/IClTensorHandle.hpp index 48cf5f57d6..48cf5f57d6 100644 --- a/src/backends/cl/IClTensorHandle.hpp +++ b/src/backends/aclCommon/IClTensorHandle.hpp diff --git a/src/backends/aclCommon/common.mk b/src/backends/aclCommon/common.mk index 0ba966af14..b113269df9 100644 --- a/src/backends/aclCommon/common.mk +++ b/src/backends/aclCommon/common.mk @@ -9,6 +9,7 @@ COMMON_SOURCES := \ ArmComputeTensorUtils.cpp \ + ArmComputeTuningUtils.cpp \ BaseMemoryManager.cpp # COMMON_TEST_SOURCES contains the list of files to be included diff --git a/src/backends/backendsCommon/CMakeLists.txt b/src/backends/backendsCommon/CMakeLists.txt index b2ab932384..8d7e114fa5 100644 --- a/src/backends/backendsCommon/CMakeLists.txt +++ b/src/backends/backendsCommon/CMakeLists.txt @@ -1,9 +1,9 @@ # -# Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +# Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # -if(NOT BUILD_BARE_METAL) +if(NOT BUILD_BARE_METAL AND NOT EXECUTE_NETWORK_STATIC) list(APPEND armnnBackendsCommon_sources DynamicBackend.cpp DynamicBackend.hpp diff --git a/src/backends/backendsCommon/test/BackendProfilingTests.cpp b/src/backends/backendsCommon/test/BackendProfilingTests.cpp index d49fa7f2ec..9041b55c57 100644 --- a/src/backends/backendsCommon/test/BackendProfilingTests.cpp +++ b/src/backends/backendsCommon/test/BackendProfilingTests.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2020, 2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -169,11 +169,6 @@ TEST_CASE("BackendProfilingCounterRegisterMockBackendTest") unsigned int shiftedId = 0; - if (armnn::BackendRegistryInstance().IsBackendRegistered("EthosNAcc")) - { - shiftedId = 4; - } - // Check if the MockBackends 3 dummy counters {0, 1, 2-5 (four cores)} are registered armnn::BackendId mockId = armnn::MockBackendId(); const ICounterMappings& counterMap = GetProfilingService(&runtime).GetCounterMappings(); diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt index 5fcc8b592e..d251bd2597 100644 --- a/src/backends/backendsCommon/test/CMakeLists.txt +++ b/src/backends/backendsCommon/test/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved. +# Copyright © 2017-2022 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # @@ -41,6 +41,7 @@ list(APPEND armnnBackendsCommonUnitTests_sources LogSoftmaxEndToEndTestImpl.hpp MemoryManagerTests.cpp MockBackendId.hpp + MultiplicationEndToEndTestImpl.hpp OptimizeSubgraphViewTests.cpp OptimizationViewsTests.cpp PreluEndToEndTestImpl.hpp @@ -57,6 +58,7 @@ list(APPEND armnnBackendsCommonUnitTests_sources SpaceToDepthEndToEndTestImpl.hpp SplitterEndToEndTestImpl.hpp StridedSliceAsyncEndToEndTest.hpp + SubtractionEndToEndTestImpl.hpp TransposeEndToEndTestImpl.hpp TensorCopyUtils.hpp WorkloadFactoryHelper.hpp diff --git a/src/backends/backendsCommon/test/MultiplicationEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/MultiplicationEndToEndTestImpl.hpp new file mode 100644 index 0000000000..40442e2d47 --- /dev/null +++ b/src/backends/backendsCommon/test/MultiplicationEndToEndTestImpl.hpp @@ -0,0 +1,96 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include <armnn/INetwork.hpp> + +#include <CommonTestUtils.hpp> +#include <ResolveType.hpp> + +#include <doctest/doctest.h> + +namespace +{ + +template<typename armnn::DataType DataType> +armnn::INetworkPtr CreateMultiplicationNetwork(const armnn::TensorShape& inputXShape, + const armnn::TensorShape& inputYShape, + const armnn::TensorShape& outputShape, + const float qScale = 1.0f, + const int32_t qOffset = 0) +{ + using namespace armnn; + + INetworkPtr network(INetwork::Create()); + + TensorInfo inputXTensorInfo(inputXShape, DataType, qScale, qOffset, true); + TensorInfo inputYTensorInfo(inputYShape, DataType, qScale, qOffset, true); + + TensorInfo outputTensorInfo(outputShape, DataType, qScale, qOffset); + + + IConnectableLayer* multiplication = network->AddMultiplicationLayer("multiplication"); + IConnectableLayer* inputX = network->AddInputLayer(0, "inputX"); + IConnectableLayer* inputY = network->AddInputLayer(1, "inputY"); + IConnectableLayer* output = network->AddOutputLayer(0, "output"); + + Connect(inputX, multiplication, inputXTensorInfo, 0, 0); + Connect(inputY, multiplication, inputYTensorInfo, 0, 1); + Connect(multiplication, output, outputTensorInfo, 0, 0); + + return network; +} + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +void MultiplicationEndToEnd(const std::vector<armnn::BackendId>& backends) +{ + using namespace armnn; + + const TensorShape& inputXShape = { 2, 2 }; + const TensorShape& inputYShape = { 2, 2 }; + const TensorShape& outputShape = { 2, 2 }; + + INetworkPtr network = CreateMultiplicationNetwork<ArmnnType>(inputXShape, inputYShape, outputShape); + + CHECK(network); + + std::vector<T> inputXData{ 1, 2, 3, 4 }; + std::vector<T> inputYData{ 5, 2, 6, 3 }; + std::vector<T> expectedOutput{ 5, 4, 18, 12 }; + + std::map<int, std::vector<T>> inputTensorData = {{ 0, inputXData }, {1, inputYData}}; + std::map<int, std::vector<T>> expectedOutputData = { { 0, expectedOutput } }; + + EndToEndLayerTestImpl<ArmnnType, ArmnnType>(std::move(network), inputTensorData, expectedOutputData, backends); +} + +template<armnn::DataType ArmnnType> +void MultiplicationEndToEndFloat16(const std::vector<armnn::BackendId>& backends) +{ + using namespace armnn; + using namespace half_float::literal; + using Half = half_float::half; + + const TensorShape& inputXShape = { 2, 2 }; + const TensorShape& inputYShape = { 2, 2 }; + const TensorShape& outputShape = { 2, 2 }; + + INetworkPtr network = CreateMultiplicationNetwork<ArmnnType>(inputXShape, inputYShape, outputShape); + CHECK(network); + + std::vector<Half> inputXData{ 1._h, 2._h, + 3._h, 4._h }; + std::vector<Half> inputYData{ 1._h, 2._h, + 3._h, 4._h }; + std::vector<Half> expectedOutput{ 1._h, 4._h, + 9._h, 16._h }; + + std::map<int, std::vector<Half>> inputTensorData = {{ 0, inputXData }, { 1, inputYData }}; + std::map<int, std::vector<Half>> expectedOutputData = { { 0, expectedOutput } }; + + EndToEndLayerTestImpl<ArmnnType, ArmnnType>(std::move(network), inputTensorData, expectedOutputData, backends); +} + +} // anonymous namespace diff --git a/src/backends/backendsCommon/test/OptimizationViewsTests.cpp b/src/backends/backendsCommon/test/OptimizationViewsTests.cpp index 9b86784dce..ff84eea2de 100644 --- a/src/backends/backendsCommon/test/OptimizationViewsTests.cpp +++ b/src/backends/backendsCommon/test/OptimizationViewsTests.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017, 2019-2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -288,4 +288,15 @@ TEST_CASE("OptimizeViewsValidateDeviceMockBackend") CheckLayers(graph); } +TEST_CASE("OptimizedViewsReturnsINetworkReference") +{ + OptimizationViews view; + + auto layer = view.GetINetworkRef().AddInputLayer(0, "input"); + + // Check layer has been added to the referenced INetwork + CHECK(layer); +} + + }
\ No newline at end of file diff --git a/src/backends/backendsCommon/test/SubtractionEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/SubtractionEndToEndTestImpl.hpp new file mode 100644 index 0000000000..747fe26df0 --- /dev/null +++ b/src/backends/backendsCommon/test/SubtractionEndToEndTestImpl.hpp @@ -0,0 +1,96 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include <armnn/INetwork.hpp> + +#include <CommonTestUtils.hpp> +#include <ResolveType.hpp> + +#include <doctest/doctest.h> + +namespace +{ + +template<typename armnn::DataType DataType> +armnn::INetworkPtr CreateSubtractionNetwork(const armnn::TensorShape& inputXShape, + const armnn::TensorShape& inputYShape, + const armnn::TensorShape& outputShape, + const float qScale = 1.0f, + const int32_t qOffset = 0) +{ + using namespace armnn; + + INetworkPtr network(INetwork::Create()); + + TensorInfo inputXTensorInfo(inputXShape, DataType, qScale, qOffset, true); + TensorInfo inputYTensorInfo(inputYShape, DataType, qScale, qOffset, true); + + TensorInfo outputTensorInfo(outputShape, DataType, qScale, qOffset); + + + IConnectableLayer* subtraction = network->AddSubtractionLayer("subtraction"); + IConnectableLayer* inputX = network->AddInputLayer(0, "inputX"); + IConnectableLayer* inputY = network->AddInputLayer(1, "inputY"); + IConnectableLayer* output = network->AddOutputLayer(0, "output"); + + Connect(inputX, subtraction, inputXTensorInfo, 0, 0); + Connect(inputY, subtraction, inputYTensorInfo, 0, 1); + Connect(subtraction, output, outputTensorInfo, 0, 0); + + return network; +} + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +void SubtractionEndToEnd(const std::vector<armnn::BackendId>& backends) +{ + using namespace armnn; + + const TensorShape& inputXShape = { 2, 2 }; + const TensorShape& inputYShape = { 2, 2 }; + const TensorShape& outputShape = { 2, 2 }; + + INetworkPtr network = CreateSubtractionNetwork<ArmnnType>(inputXShape, inputYShape, outputShape); + + CHECK(network); + + std::vector<T> inputXData{ 10, 11, 12, 13 }; + std::vector<T> inputYData{ 5, 7, 6, 8 }; + std::vector<T> expectedOutput{ 5, 4, 6, 5 }; + + std::map<int, std::vector<T>> inputTensorData = {{ 0, inputXData }, {1, inputYData}}; + std::map<int, std::vector<T>> expectedOutputData = { { 0, expectedOutput } }; + + EndToEndLayerTestImpl<ArmnnType, ArmnnType>(std::move(network), inputTensorData, expectedOutputData, backends); +} + +template<armnn::DataType ArmnnType> +void SubtractionEndToEndFloat16(const std::vector<armnn::BackendId>& backends) +{ + using namespace armnn; + using namespace half_float::literal; + using Half = half_float::half; + + const TensorShape& inputXShape = { 2, 2 }; + const TensorShape& inputYShape = { 2, 2 }; + const TensorShape& outputShape = { 2, 2 }; + + INetworkPtr network = CreateSubtractionNetwork<ArmnnType>(inputXShape, inputYShape, outputShape); + CHECK(network); + + std::vector<Half> inputXData{ 11._h, 12._h, + 13._h, 14._h }; + std::vector<Half> inputYData{ 5._h, 7._h, + 6._h, 8._h }; + std::vector<Half> expectedOutput{ 6._h, 5._h, + 7._h, 6._h }; + + std::map<int, std::vector<Half>> inputTensorData = {{ 0, inputXData }, { 1, inputYData }}; + std::map<int, std::vector<Half>> expectedOutputData = { { 0, expectedOutput } }; + + EndToEndLayerTestImpl<ArmnnType, ArmnnType>(std::move(network), inputTensorData, expectedOutputData, backends); +} + +} // anonymous namespace diff --git a/src/backends/cl/CMakeLists.txt b/src/backends/cl/CMakeLists.txt index aeb90b069c..20c42061fc 100644 --- a/src/backends/cl/CMakeLists.txt +++ b/src/backends/cl/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright © 2017 Arm Ltd. All rights reserved. +# Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # @@ -44,7 +44,6 @@ if(ARMCOMPUTECL) ClTensorHandleFactory.hpp ClWorkloadFactory.cpp ClWorkloadFactory.hpp - IClTensorHandle.hpp ICLTensorProxy.hpp OpenClTimer.cpp OpenClTimer.hpp diff --git a/src/backends/cl/ClBackendContext.cpp b/src/backends/cl/ClBackendContext.cpp index 62c6b038da..adee2763ba 100644 --- a/src/backends/cl/ClBackendContext.cpp +++ b/src/backends/cl/ClBackendContext.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -20,20 +20,11 @@ namespace armnn struct ClBackendContext::ClContextControlWrapper { - ClContextControlWrapper() {} - - bool IsInitialised() - { - return m_Initialised; - } - - void Init(arm_compute::CLTuner* tuner, - arm_compute::CLGEMMHeuristicsHandle* heuristicsHandle, - bool profilingEnabled) - { - m_ClContextControl = ClContextControl(tuner, heuristicsHandle, profilingEnabled); - m_Initialised = true; - } + ClContextControlWrapper(arm_compute::CLTuner* tuner, + arm_compute::CLGEMMHeuristicsHandle* heuristicsHandle, + bool profilingEnabled) + : m_ClContextControl(tuner, heuristicsHandle, profilingEnabled) + {} bool Sync() { @@ -62,106 +53,12 @@ struct ClBackendContext::ClContextControlWrapper { // There are no loaded networks left, so clear the CL cache to free up memory m_ClContextControl.ClearClCache(); - m_Initialised = false; } } -private: - bool m_Initialised; ClContextControl m_ClContextControl; - }; -/** - * Returns a shared_ptr to the CLContextControlWrapper. This wraps the CLContextControl and ensures that we only create - * and use one at a time. - */ -std::shared_ptr<ClBackendContext::ClContextControlWrapper> ClBackendContext::Get() -{ - static std::shared_ptr<ClBackendContext::ClContextControlWrapper> instance - = std::make_shared<ClBackendContext::ClContextControlWrapper>(); - // Instantiated on first use. - return instance; -} - -std::string LowerString(std::string value) -{ - std::transform(value.begin(), value.end(), value.begin(), - [](unsigned char c){ return std::tolower(c); }); - - return value; -} - -enum class TuningLevel -{ - None, - Rapid, - Normal, - Exhaustive -}; - - -TuningLevel ParseTuningLevel(const BackendOptions::Var& value, TuningLevel defaultValue) -{ - if (value.IsInt()) - { - int v = value.AsInt(); - if (v > static_cast<int>(TuningLevel::Exhaustive) || - v < static_cast<int>(TuningLevel::None)) - { - ARMNN_LOG(warning) << "Invalid GpuAcc tuning level ("<< v << ") selected. " - "Using default(" << static_cast<int>(defaultValue) << ")"; - } else - { - return static_cast<TuningLevel>(v); - } - } - return defaultValue; -} - -bool ParseBoolean(const BackendOptions::Var& value, bool defaultValue) -{ - if (value.IsBool()) - { - return value.AsBool(); - } - return defaultValue; -} - -std::string ParseFile(const BackendOptions::Var& value, std::string defaultValue) -{ - if (value.IsString()) - { - return value.AsString(); - } - return defaultValue; -} - -void ConfigureTuner(arm_compute::CLTuner &tuner, TuningLevel level) -{ - tuner.set_tune_new_kernels(true); // Turn on tuning initially. - - switch (level) - { - case TuningLevel::Rapid: - ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Rapid (1)"; - tuner.set_tuner_mode(arm_compute::CLTunerMode::RAPID); - break; - case TuningLevel::Normal: - ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Normal (2)"; - tuner.set_tuner_mode(arm_compute::CLTunerMode::NORMAL); - break; - case TuningLevel::Exhaustive: - ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Exhaustive (3)"; - tuner.set_tuner_mode(arm_compute::CLTunerMode::EXHAUSTIVE); - break; - case TuningLevel::None: - default: - tuner.set_tune_new_kernels(false); // Turn off tuning. Set to "use" only mode. - break; - } -} - ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options) : IBackendContext(options) , m_TuningFile() @@ -171,7 +68,6 @@ ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options) arm_compute::CLTuner* tuner = nullptr; arm_compute::CLGEMMHeuristicsHandle* mlgoTuner = nullptr; bool useLegacyTunerAPI = options.m_GpuAccTunedParameters.get() != nullptr; - if (useLegacyTunerAPI) { auto clTunerParams = PolymorphicDowncast<ClTunedParameters*>( @@ -217,17 +113,17 @@ ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options) { if (name == "KernelProfilingEnabled") { - kernelProfiling |= ParseBoolean(value, false); + kernelProfiling |= ParseBooleanBackendOption(value, false); } else if (name == "TuningFile") { - m_TuningFile = ParseFile(value, ""); + m_TuningFile = ParseStringBackendOption(value, ""); } else if (name == "TuningLevel") { tuningLevel = ParseTuningLevel(value, defaultTuningLevel); } else if (name == "MLGOTuningFilePath") { - m_MLGOTuningFile = ParseFile(value, ""); + m_MLGOTuningFile = ParseStringBackendOption(value, ""); } }); @@ -272,12 +168,11 @@ ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options) tuner = m_Tuner.get(); } - m_ClContextControlWrapper = Get(); - - if (!m_ClContextControlWrapper->IsInitialised()) - { - m_ClContextControlWrapper->Init(tuner, mlgoTuner, kernelProfiling); - } + m_ClContextControlWrapper = std::make_unique<ClContextControlWrapper>( + tuner, + mlgoTuner, + kernelProfiling + ); } bool ClBackendContext::BeforeLoadNetwork(NetworkId) diff --git a/src/backends/cl/ClBackendContext.hpp b/src/backends/cl/ClBackendContext.hpp index 276067727b..659d47b7c2 100644 --- a/src/backends/cl/ClBackendContext.hpp +++ b/src/backends/cl/ClBackendContext.hpp @@ -31,11 +31,8 @@ public: private: std::mutex m_Mutex; - struct ClContextControlWrapper; - static std::shared_ptr<ClBackendContext::ClContextControlWrapper> Get(); - - std::shared_ptr<ClBackendContext::ClContextControlWrapper> m_ClContextControlWrapper; + std::unique_ptr<ClContextControlWrapper> m_ClContextControlWrapper; std::unordered_set<NetworkId> m_NetworkIds; diff --git a/src/backends/cl/ClContextControl.cpp b/src/backends/cl/ClContextControl.cpp index fd2d0f53eb..34eca961b4 100644 --- a/src/backends/cl/ClContextControl.cpp +++ b/src/backends/cl/ClContextControl.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -166,55 +166,4 @@ void ClContextControl::ClearClCache() DoLoadOpenClRuntime(true); } -armnn::IGpuAccTunedParameters* IGpuAccTunedParameters::CreateRaw(armnn::IGpuAccTunedParameters::Mode mode, - armnn::IGpuAccTunedParameters::TuningLevel tuningLevel) -{ - return new ClTunedParameters(mode, tuningLevel); -} - -armnn::IGpuAccTunedParametersPtr IGpuAccTunedParameters::Create(armnn::IGpuAccTunedParameters::Mode mode, - armnn::IGpuAccTunedParameters::TuningLevel tuningLevel) -{ - return IGpuAccTunedParametersPtr(CreateRaw(mode, tuningLevel), &IGpuAccTunedParameters::Destroy); -} - -void IGpuAccTunedParameters::Destroy(IGpuAccTunedParameters* params) -{ - delete params; -} - -ClTunedParameters::ClTunedParameters(armnn::IGpuAccTunedParameters::Mode mode, - armnn::IGpuAccTunedParameters::TuningLevel tuningLevel) - : m_Mode(mode) - , m_TuningLevel(tuningLevel) - , m_Tuner(mode == ClTunedParameters::Mode::UpdateTunedParameters) -{ -} - -void ClTunedParameters::Load(const char* filename) -{ - try - { - m_Tuner.load_from_file(filename); - } - catch (const std::exception& e) - { - throw armnn::Exception(std::string("Failed to load tuned parameters file '") + filename + "': " + - e.what()); - } -} - -void ClTunedParameters::Save(const char* filename) const -{ - try - { - m_Tuner.save_to_file(filename); - } - catch (const std::exception& e) - { - throw armnn::Exception(std::string("Failed to save tuned parameters file to '") + filename + "': " + - e.what()); - } -} - } // namespace armnn diff --git a/src/backends/cl/ClContextControl.hpp b/src/backends/cl/ClContextControl.hpp index 4a640cdf22..7520d102a5 100644 --- a/src/backends/cl/ClContextControl.hpp +++ b/src/backends/cl/ClContextControl.hpp @@ -1,13 +1,10 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once -#include "armnn/IRuntime.hpp" - -#include <arm_compute/runtime/CL/CLTuner.h> -#include <arm_compute/runtime/CL/CLGEMMHeuristicsHandle.h> +#include <aclCommon/ArmComputeTuningUtils.hpp> namespace armnn { @@ -42,19 +39,4 @@ private: bool m_ProfilingEnabled; }; -class ClTunedParameters : public IGpuAccTunedParameters -{ -public: - ClTunedParameters(armnn::IGpuAccTunedParameters::Mode mode, armnn::IGpuAccTunedParameters::TuningLevel tuningLevel); - - virtual void Load(const char* filename); - virtual void Save(const char* filename) const; - - Mode m_Mode; - TuningLevel m_TuningLevel; - - arm_compute::CLTuner m_Tuner; - arm_compute::CLGEMMHeuristicsHandle m_HeuristicsHandle; -}; - } // namespace armnn diff --git a/src/backends/cl/ClImportTensorHandle.hpp b/src/backends/cl/ClImportTensorHandle.hpp index 889a2ad5f3..a03a4e9ea6 100644 --- a/src/backends/cl/ClImportTensorHandle.hpp +++ b/src/backends/cl/ClImportTensorHandle.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -19,7 +19,7 @@ #include <arm_compute/core/TensorShape.h> #include <arm_compute/core/Coordinates.h> -#include <cl/IClTensorHandle.hpp> +#include <aclCommon/IClTensorHandle.hpp> #include <CL/cl_ext.h> #include <arm_compute/core/CL/CLKernelLibrary.h> diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp index a61a5bb640..cb2d756037 100644 --- a/src/backends/cl/ClLayerSupport.cpp +++ b/src/backends/cl/ClLayerSupport.cpp @@ -22,6 +22,7 @@ #include "workloads/ClAdditionWorkload.hpp" #include "workloads/ClActivationWorkload.hpp" #include "workloads/ClArgMinMaxWorkload.hpp" +#include "workloads/ClBatchMatMulWorkload.hpp" #include "workloads/ClBatchNormalizationFloatWorkload.hpp" #include "workloads/ClBatchToSpaceNdWorkload.hpp" #include "workloads/ClCastWorkload.hpp" @@ -201,6 +202,12 @@ bool ClLayerSupport::IsLayerSupported(const LayerType& type, infos[1], *(PolymorphicDowncast<const ArgMinMaxDescriptor*>(&descriptor)), reasonIfUnsupported); + case LayerType::BatchMatMul: + return IsBatchMatMulSupported(infos[0], + infos[1], + infos[2], + *(PolymorphicDowncast<const BatchMatMulDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::BatchNormalization: return IsBatchNormalizationSupported(infos[0], infos[1], @@ -640,6 +647,20 @@ bool ClLayerSupport::IsArgMinMaxSupported(const TensorInfo& input, descriptor); } +bool ClLayerSupport::IsBatchMatMulSupported(const TensorInfo& inputX, + const TensorInfo& inputY, + const TensorInfo& output, + const BatchMatMulDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported) const +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClBatchMatMulValidate, + reasonIfUnsupported, + inputX, + inputY, + output, + descriptor); +} + bool ClLayerSupport::IsBatchNormalizationSupported(const TensorInfo& input, const TensorInfo& output, const TensorInfo& mean, diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp index 27311f74aa..2d784e3df8 100644 --- a/src/backends/cl/ClLayerSupport.hpp +++ b/src/backends/cl/ClLayerSupport.hpp @@ -40,6 +40,12 @@ public: const ArgMinMaxDescriptor& descriptor, Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; + bool IsBatchMatMulSupported(const TensorInfo& inputX, + const TensorInfo& inputY, + const TensorInfo& output, + const BatchMatMulDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const; + bool IsBatchNormalizationSupported(const TensorInfo& input, const TensorInfo& output, const TensorInfo& mean, diff --git a/src/backends/cl/ClTensorHandle.hpp b/src/backends/cl/ClTensorHandle.hpp index f63f1faa07..3d750f9059 100644 --- a/src/backends/cl/ClTensorHandle.hpp +++ b/src/backends/cl/ClTensorHandle.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once @@ -18,7 +18,7 @@ #include <arm_compute/core/TensorShape.h> #include <arm_compute/core/Coordinates.h> -#include <cl/IClTensorHandle.hpp> +#include <aclCommon/IClTensorHandle.hpp> namespace armnn { diff --git a/src/backends/cl/ClTensorHandleFactory.cpp b/src/backends/cl/ClTensorHandleFactory.cpp index b8ee57f0bf..82e41d3ff6 100644 --- a/src/backends/cl/ClTensorHandleFactory.cpp +++ b/src/backends/cl/ClTensorHandleFactory.cpp @@ -108,12 +108,12 @@ bool ClTensorHandleFactory::SupportsSubTensors() const MemorySourceFlags ClTensorHandleFactory::GetExportFlags() const { - return m_ExportFlags; + return MemorySourceFlags(MemorySource::Undefined); } MemorySourceFlags ClTensorHandleFactory::GetImportFlags() const { - return m_ImportFlags; + return MemorySourceFlags(MemorySource::Undefined); } -} // namespace armnn
\ No newline at end of file +} // namespace armnn diff --git a/src/backends/cl/ClTensorHandleFactory.hpp b/src/backends/cl/ClTensorHandleFactory.hpp index 3acab0bce7..8e1c7a8a02 100644 --- a/src/backends/cl/ClTensorHandleFactory.hpp +++ b/src/backends/cl/ClTensorHandleFactory.hpp @@ -24,8 +24,6 @@ public: ClTensorHandleFactory(std::shared_ptr<ClMemoryManager> mgr) : m_MemoryManager(mgr) - , m_ImportFlags(static_cast<MemorySourceFlags>(MemorySource::Undefined)) - , m_ExportFlags(static_cast<MemorySourceFlags>(MemorySource::Undefined)) {} std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent, @@ -56,8 +54,6 @@ public: private: mutable std::shared_ptr<ClMemoryManager> m_MemoryManager; - MemorySourceFlags m_ImportFlags; - MemorySourceFlags m_ExportFlags; }; -} // namespace armnn
\ No newline at end of file +} // namespace armnn diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp index d0079abd38..6bf510a2ef 100644 --- a/src/backends/cl/ClWorkloadFactory.cpp +++ b/src/backends/cl/ClWorkloadFactory.cpp @@ -265,6 +265,11 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateWorkload(LayerType type, auto argMinMaxQueueDescriptor = PolymorphicDowncast<const ArgMinMaxQueueDescriptor*>(&descriptor); return MakeWorkload<ClArgMinMaxWorkload>(*argMinMaxQueueDescriptor, info, m_CLCompileContext); } + case LayerType::BatchMatMul : + { + auto batchMatMulQueueDescriptor = PolymorphicDowncast<const BatchMatMulQueueDescriptor*>(&descriptor); + return std::make_unique<ClBatchMatMulWorkload>(*batchMatMulQueueDescriptor, info, m_CLCompileContext); + } case LayerType::BatchNormalization : { auto batchNormalizationQueueDescriptor diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk index 6fda16db05..1f97ae7cc8 100644 --- a/src/backends/cl/backend.mk +++ b/src/backends/cl/backend.mk @@ -30,6 +30,7 @@ BACKEND_SOURCES := \ workloads/ClActivationWorkload.cpp \ workloads/ClAdditionWorkload.cpp \ workloads/ClArgMinMaxWorkload.cpp \ + workloads/ClBatchMatMulWorkload.cpp \ workloads/ClBatchNormalizationFloatWorkload.cpp \ workloads/ClBatchToSpaceNdWorkload.cpp \ workloads/ClCastWorkload.cpp \ diff --git a/src/backends/cl/test/CMakeLists.txt b/src/backends/cl/test/CMakeLists.txt index ec1d0a6c2f..6568d48ce5 100644 --- a/src/backends/cl/test/CMakeLists.txt +++ b/src/backends/cl/test/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright © 2017 Arm Ltd. All rights reserved. +# Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # @@ -8,6 +8,7 @@ list(APPEND armnnClBackendUnitTests_sources ClContextControlFixture.hpp ClContextSerializerTests.cpp ClCustomAllocatorTests.cpp + ClDefaultAllocatorTests.cpp ClCreateWorkloadTests.cpp ClEndToEndTests.cpp ClImportTensorHandleFactoryTests.cpp @@ -18,7 +19,6 @@ list(APPEND armnnClBackendUnitTests_sources ClOptimizedNetworkTests.cpp ClRuntimeTests.cpp ClWorkloadFactoryHelper.hpp - DefaultAllocatorTests.cpp Fp16SupportTest.cpp ICLTensorProxyTests.cpp OpenClTimerTest.cpp diff --git a/src/backends/cl/test/DefaultAllocatorTests.cpp b/src/backends/cl/test/ClDefaultAllocatorTests.cpp index eaa30c8800..411a480815 100644 --- a/src/backends/cl/test/DefaultAllocatorTests.cpp +++ b/src/backends/cl/test/ClDefaultAllocatorTests.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2021, 2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp index 855697c9be..4ba2a9ec3b 100644 --- a/src/backends/cl/test/ClLayerTests.cpp +++ b/src/backends/cl/test/ClLayerTests.cpp @@ -73,6 +73,29 @@ ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tanh, ClContextControlFixture, TanhTest) // Elu Activation ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Elu, ClContextControlFixture, EluTest) +// Batch Mat Mul +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul2DSimpleFloat32, + ClContextControlFixture, + BatchMatMul2DSimpleTest<DataType::Float32>); +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul3DSimpleFloat32, + ClContextControlFixture, + BatchMatMul3DSimpleTest<DataType::Float32>); +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul3DBatchFloat32, + ClContextControlFixture, + BatchMatMul3DBatchTest<DataType::Float32>); +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul3DBroadcastFloat32, + ClContextControlFixture, + BatchMatMul3DBroadcastTest<DataType::Float32>); +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul3D2DBroadcastFloat32, + ClContextControlFixture, + BatchMatMul3D2DBroadcastTest<DataType::Float32>); +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul2DTinyFloat32, + ClContextControlFixture, + BatchMatMul2DTinyTest<DataType::Float32>); +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul2DTranspSimpleFloat32, + ClContextControlFixture, + BatchMatMul2DTranspSimpleTest<DataType::Float32>); + // Batch To Space ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchToSpaceNdNhwcFloat321, ClContextControlFixture, diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt index aef7fc7ad2..8616dec078 100644 --- a/src/backends/cl/workloads/CMakeLists.txt +++ b/src/backends/cl/workloads/CMakeLists.txt @@ -12,6 +12,8 @@ list(APPEND armnnClBackendWorkloads_sources ClAdditionWorkload.hpp ClArgMinMaxWorkload.cpp ClArgMinMaxWorkload.hpp + ClBatchMatMulWorkload.cpp + ClBatchMatMulWorkload.hpp ClBatchNormalizationFloatWorkload.cpp ClBatchNormalizationFloatWorkload.hpp ClBatchToSpaceNdWorkload.cpp diff --git a/src/backends/cl/workloads/ClBatchMatMulWorkload.cpp b/src/backends/cl/workloads/ClBatchMatMulWorkload.cpp new file mode 100644 index 0000000000..4acdef5e5c --- /dev/null +++ b/src/backends/cl/workloads/ClBatchMatMulWorkload.cpp @@ -0,0 +1,203 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClBatchMatMulWorkload.hpp" + +#include "ClWorkloadUtils.hpp" + +#include <aclCommon/ArmComputeTensorUtils.hpp> +#include <aclCommon/ArmComputeUtils.hpp> + +#include <armnn/utility/PolymorphicDowncast.hpp> + +#include <armnnUtils/Permute.hpp> + +#include <backendsCommon/WorkloadUtils.hpp> + +#include <cl/ClTensorHandle.hpp> + +#include <arm_compute/runtime/CL/functions/CLGEMM.h> +#include <arm_compute/runtime/CL/functions/CLPermute.h> + + +namespace armnn +{ +arm_compute::Status ClBatchMatMulValidate(const TensorInfo& inputX, + const TensorInfo& inputY, + const TensorInfo& output, + const BatchMatMulDescriptor& descriptor) +{ + if (descriptor.m_AdjointX || descriptor.m_AdjointY ) + { + throw Exception("Support for adjoint not implemented."); + } + if (descriptor.m_DataLayoutX != armnn::DataLayout::NCHW || descriptor.m_DataLayoutY != armnn::DataLayout::NCHW ) + { + throw Exception("Only supported the MatMul in the last 2 dimensions"); + } + + arm_compute::Status statusGEMM = arm_compute::Status(arm_compute::ErrorCode::OK); + arm_compute::Status statusPermuteX = arm_compute::Status(arm_compute::ErrorCode::OK); + arm_compute::Status statusPermuteY = arm_compute::Status(arm_compute::ErrorCode::OK); + + const auto aclInputXInfo = armcomputetensorutils::BuildArmComputeTensorInfo(inputX, descriptor.m_DataLayoutX); + const auto aclInputYInfo = armcomputetensorutils::BuildArmComputeTensorInfo(inputY, descriptor.m_DataLayoutY); + const auto aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + arm_compute::TensorInfo aclPermutedXInfo = arm_compute::TensorInfo(); + arm_compute::TensorInfo aclPermutedYInfo = arm_compute::TensorInfo(); + + if (descriptor.m_TransposeX == true) + { + auto permutationXVector = GeneratePermutationVectorOnLastTwoDimensions(inputX.GetNumDimensions()); + const auto aclPermutationXVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationXVector); + const TensorInfo permutedXInfo = armnnUtils::Permuted(inputX, permutationXVector); + aclPermutedXInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permutedXInfo); + + statusPermuteX = arm_compute::CLPermute::validate(&aclInputXInfo, + &aclPermutedXInfo, + aclPermutationXVector); + } + + if ( descriptor.m_TransposeY == true) + { + auto permutationYVector = GeneratePermutationVectorOnLastTwoDimensions(inputY.GetNumDimensions()); + const auto aclPermutationYVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationYVector); + const TensorInfo permutedYInfo = armnnUtils::Permuted(inputY, permutationYVector); + aclPermutedYInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permutedYInfo); + + statusPermuteY = arm_compute::CLPermute::validate(&aclInputYInfo, + &aclPermutedYInfo, + aclPermutationYVector); + + } + + const arm_compute::GEMMInfo& gemm_info = arm_compute::GEMMInfo(false, // is inputX reshaped + false, // is inputY reshaped + false); // is inputY reshaped only 1st run + + + statusGEMM = arm_compute::CLGEMM::validate(descriptor.m_TransposeX ? &aclPermutedXInfo : &aclInputXInfo, + descriptor.m_TransposeY ? &aclPermutedYInfo : &aclInputYInfo, + nullptr, + &aclOutputInfo, + 1.0, + 0, + gemm_info); + + if (statusPermuteX.error_code() == arm_compute::ErrorCode::OK && + statusPermuteY.error_code() == arm_compute::ErrorCode::OK && + statusGEMM.error_code() == arm_compute::ErrorCode::OK) + { + return arm_compute::Status(arm_compute::ErrorCode::OK, + "All Batch Mat Mul layers validate status OK."); + } + else + { + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, + "BatchMatMul layer validate status failed." + + statusGEMM.error_description() + + statusPermuteX.error_description() + + statusPermuteY.error_description()); + } + +} + +ClBatchMatMulWorkload::ClBatchMatMulWorkload(const BatchMatMulQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) + : ClBaseWorkload<BatchMatMulQueueDescriptor>(descriptor, info) +{ + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClBatchMatMulWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + + if (descriptor.m_Parameters.m_AdjointX || descriptor.m_Parameters.m_AdjointY ) + { + throw Exception("Support for adjoint not implemented."); + } + if (descriptor.m_Parameters.m_DataLayoutX != armnn::DataLayout::NCHW || + descriptor.m_Parameters.m_DataLayoutY != armnn::DataLayout::NCHW ) + { + throw Exception("Only supported the MatMul in the last 2 dimensions"); + } + + m_Data.ValidateInputsOutputs("ClBatchMatMulWorkload", 2, 1); + + const arm_compute::ICLTensor& inputX = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + const arm_compute::ICLTensor& inputY = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + inputX.info()->set_data_layout(armcomputetensorutils::ConvertDataLayout(m_Data.m_Parameters.m_DataLayoutX)); + inputY.info()->set_data_layout(armcomputetensorutils::ConvertDataLayout(m_Data.m_Parameters.m_DataLayoutY)); + + arm_compute::TensorInfo aclPermutedXInfo = arm_compute::TensorInfo(); + arm_compute::TensorInfo aclPermutedYInfo = arm_compute::TensorInfo(); + + if (descriptor.m_Parameters.m_TransposeX == true) + { + armnn::PermutationVector permutationXVector + = GeneratePermutationVectorOnLastTwoDimensions(info.m_InputTensorInfos[0].GetNumDimensions()); + const TensorInfo permutedXInfo = armnnUtils::Permuted(info.m_InputTensorInfos[0], permutationXVector); + const auto aclPermutationXVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationXVector); + armcomputetensorutils::BuildArmComputeTensor(m_PermutedTensorX, permutedXInfo); + armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_PermutedTensorX); + + auto permuteLayerX = std::make_unique<arm_compute::CLPermute>(); + permuteLayerX->configure(clCompileContext, + &inputX, + &m_PermutedTensorX, + aclPermutationXVector); + m_PermuteLayerX.reset(permuteLayerX.release()); + } + + if (descriptor.m_Parameters.m_TransposeY == true) + { + armnn::PermutationVector permutationYVector + = GeneratePermutationVectorOnLastTwoDimensions(info.m_InputTensorInfos[0].GetNumDimensions()); + const TensorInfo permutedYInfo = armnnUtils::Permuted(info.m_InputTensorInfos[0], permutationYVector); + const auto aclPermutationYVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationYVector); + armcomputetensorutils::BuildArmComputeTensor(m_PermutedTensorY, permutedYInfo); + armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_PermutedTensorY); + + std::unique_ptr<arm_compute::CLPermute> permuteLayerY(new arm_compute::CLPermute()); + permuteLayerY->configure(clCompileContext, + &inputY, + &m_PermutedTensorY, + aclPermutationYVector); + m_PermuteLayerY.reset(permuteLayerY.release()); + } + + const arm_compute::GEMMInfo& gemm_info = arm_compute::GEMMInfo(false, // is inputX reshaped + false, // is inputY reshaped + false); // is inputY reshaped only 1st run + auto gemmLayer = std::make_unique<arm_compute::CLGEMM>(); + gemmLayer->configure(clCompileContext, + descriptor.m_Parameters.m_TransposeX ? &m_PermutedTensorX : &inputX, + descriptor.m_Parameters.m_TransposeY ? &m_PermutedTensorY : &inputY, + nullptr, + &output, + 1.0, + 0, + gemm_info); + m_GEMMLayer.reset(gemmLayer.release()); +} + +void ClBatchMatMulWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClBatchMatMulWorkload_Execute", this->GetGuid()); + if (m_PermuteLayerX) + { + m_PermuteLayerX->run(); + } + if (m_PermuteLayerY) + { + m_PermuteLayerY->run(); + } + m_GEMMLayer->run(); +} +} //namespace armnn diff --git a/src/backends/cl/workloads/ClBatchMatMulWorkload.hpp b/src/backends/cl/workloads/ClBatchMatMulWorkload.hpp new file mode 100644 index 0000000000..5277efc947 --- /dev/null +++ b/src/backends/cl/workloads/ClBatchMatMulWorkload.hpp @@ -0,0 +1,41 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseWorkload.hpp" + +#include <arm_compute/runtime/IFunction.h> +#include <arm_compute/runtime/CL/CLTensor.h> +#include <memory> + +namespace armnn +{ + arm_compute::Status ClBatchMatMulValidate(const TensorInfo& inputX, + const TensorInfo& inputY, + const TensorInfo& output, + const BatchMatMulDescriptor& descriptor); + + class ClBatchMatMulWorkload : public ClBaseWorkload<BatchMatMulQueueDescriptor> + { + public: + ClBatchMatMulWorkload(const BatchMatMulQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); + virtual void Execute() const override; + + private: + // ACL layers required to fully form a Batch Mat Mul layer. + std::unique_ptr<arm_compute::IFunction> m_GEMMLayer; + std::unique_ptr<arm_compute::IFunction> m_PermuteLayerX; + std::unique_ptr<arm_compute::IFunction> m_PermuteLayerY; + + // Additional CL arm_compute::Tensors. + // Required to perform permutations. + arm_compute::CLTensor m_PermutedTensorX; + arm_compute::CLTensor m_PermutedTensorY; + + }; +} //namespace armnn diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp index c3a79b7583..44f3798d7d 100644 --- a/src/backends/cl/workloads/ClWorkloads.hpp +++ b/src/backends/cl/workloads/ClWorkloads.hpp @@ -10,6 +10,7 @@ #include "ClArgMinMaxWorkload.hpp" #include "ClComparisonWorkload.hpp" #include "ClConstantWorkload.hpp" +#include "ClBatchMatMulWorkload.hpp" #include "ClBatchNormalizationFloatWorkload.hpp" #include "ClBatchToSpaceNdWorkload.hpp" #include "ClCastWorkload.hpp" diff --git a/src/backends/dynamic/reference/CMakeLists.txt b/src/backends/dynamic/reference/CMakeLists.txt index de46f7a5cb..fe875282f5 100644 --- a/src/backends/dynamic/reference/CMakeLists.txt +++ b/src/backends/dynamic/reference/CMakeLists.txt @@ -1,9 +1,9 @@ # -# Copyright © 2017 Arm Ltd. All rights reserved. +# Copyright © 2017, 2023 Arm Ltd. All rights reserved. # SPDX-License-Identifier: MIT # -if(NOT BUILD_BARE_METAL) +if((NOT BUILD_BARE_METAL) AND (NOT EXECUTE_NETWORK_STATIC)) # File needed to wrap the existing backend into a dynamic one list(APPEND armnnRefDynamicBackend_sources @@ -33,5 +33,5 @@ target_include_directories(Arm_CpuRef_backend PRIVATE ${PROJECT_SOURCE_DIR}/prof set_target_properties(Arm_CpuRef_backend PROPERTIES PREFIX "") target_link_libraries(Arm_CpuRef_backend armnn) -# BUILD_BARE_METAL +# BUILD_BARE_METAL && EXECUTE_NETWORK_STATIC endif() diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp index 4c97855668..ee155a2c64 100644 --- a/src/backends/neon/NeonLayerSupport.cpp +++ b/src/backends/neon/NeonLayerSupport.cpp @@ -90,6 +90,19 @@ namespace armnn namespace { +const TensorInfo OverrideDataType(const TensorInfo& info, Optional<DataType> type) +{ + if (!type) + { + return info; + } + return TensorInfo(info.GetShape(), + type.value(), + info.GetQuantizationScale(), + info.GetQuantizationOffset(), + info.IsConstant()); +} + template< typename ... Args> bool IsNeonBackendSupported(Optional<std::string&> reasonIfUnsupported, Args... args) { @@ -151,61 +164,64 @@ NeonLayerSupport::NeonLayerSupport() { } -bool NeonLayerSupport::IsLayerSupported(const LayerType& type, - const std::vector<TensorInfo>& infos, - const BaseDescriptor& descriptor, - const Optional<LstmInputParamsInfo>& lstmParamsInfo, - const Optional<QuantizedLstmInputParamsInfo>& quantizedLstmParamsInfo, - Optional<std::string&> reasonIfUnsupported) const +bool IsLayerTypeSupported(const LayerType& type, + const std::vector<TensorInfo>& infos, + const BaseDescriptor& descriptor, + const Optional<LstmInputParamsInfo>& lstmParamsInfo, + const Optional<QuantizedLstmInputParamsInfo>& quantizedLstmParamsInfo, + Optional<std::string&> reasonIfUnsupported, + const NeonLayerSupport& support) { switch (type) { case LayerType::Activation: - return IsActivationSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const ActivationDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsActivationSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const ActivationDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Addition: - return IsAdditionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); + return support.IsAdditionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); case LayerType::ArgMinMax: - return IsArgMinMaxSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const ArgMinMaxDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsArgMinMaxSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const ArgMinMaxDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::BatchMatMul: - return IsBatchMatMulSupported(infos[0], - infos[1], - infos[2], - *(PolymorphicDowncast<const BatchMatMulDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsBatchMatMulSupported(infos[0], + infos[1], + infos[2], + *(PolymorphicDowncast<const BatchMatMulDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::BatchNormalization: - return IsBatchNormalizationSupported(infos[0], - infos[1], - infos[2], - infos[3], - infos[4], - infos[5], - *(PolymorphicDowncast<const BatchNormalizationDescriptor*> - (&descriptor)), - reasonIfUnsupported); + return support.IsBatchNormalizationSupported(infos[0], + infos[1], + infos[2], + infos[3], + infos[4], + infos[5], + *(PolymorphicDowncast<const + BatchNormalizationDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::BatchToSpaceNd: - return IsBatchToSpaceNdSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const BatchToSpaceNdDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsBatchToSpaceNdSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const + BatchToSpaceNdDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Cast: - return IsCastSupported(infos[0], infos[1], reasonIfUnsupported); + return support.IsCastSupported(infos[0], infos[1], reasonIfUnsupported); case LayerType::ChannelShuffle: - return IsChannelShuffleSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const ChannelShuffleDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsChannelShuffleSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const + ChannelShuffleDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Comparison: - return IsComparisonSupported(infos[0], - infos[1], - infos[2], - *(PolymorphicDowncast<const ComparisonDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsComparisonSupported(infos[0], + infos[1], + infos[2], + *(PolymorphicDowncast<const ComparisonDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Concat: { std::vector<const TensorInfo*> inputInfos; @@ -213,17 +229,17 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type, { inputInfos.push_back(&infos[i]); } - return IsConcatSupported(inputInfos, - infos[infos.size() - 1], - *(PolymorphicDowncast<const OriginsDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsConcatSupported(inputInfos, + infos[infos.size() - 1], + *(PolymorphicDowncast<const OriginsDescriptor*>(&descriptor)), + reasonIfUnsupported); } case LayerType::Constant: - return IsConstantSupported(infos[0], reasonIfUnsupported); + return support.IsConstantSupported(infos[0], reasonIfUnsupported); case LayerType::ConvertFp16ToFp32: - return IsConvertFp16ToFp32Supported(infos[0], infos[1], reasonIfUnsupported); + return support.IsConvertFp16ToFp32Supported(infos[0], infos[1], reasonIfUnsupported); case LayerType::ConvertFp32ToFp16: - return IsConvertFp32ToFp16Supported(infos[0], infos[1], reasonIfUnsupported); + return support.IsConvertFp32ToFp16Supported(infos[0], infos[1], reasonIfUnsupported); case LayerType::Convolution2d: { if (infos.size() != 4) @@ -235,21 +251,21 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type, auto desc = *(PolymorphicDowncast<const Convolution2dDescriptor*>(&descriptor)); if (infos[3] == TensorInfo()) { - return IsConvolution2dSupported(infos[0], - infos[1], - desc, - infos[2], - EmptyOptional(), - reasonIfUnsupported); + return support.IsConvolution2dSupported(infos[0], + infos[1], + desc, + infos[2], + EmptyOptional(), + reasonIfUnsupported); } else { - return IsConvolution2dSupported(infos[0], - infos[1], - desc, - infos[2], - infos[3], - reasonIfUnsupported); + return support.IsConvolution2dSupported(infos[0], + infos[1], + desc, + infos[2], + infos[3], + reasonIfUnsupported); } } case LayerType::Convolution3d: @@ -263,28 +279,28 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type, auto desc = *(PolymorphicDowncast<const Convolution3dDescriptor*>(&descriptor)); if (infos[3] == TensorInfo()) { - return IsConvolution3dSupported(infos[0], - infos[1], - desc, - infos[2], - EmptyOptional(), - reasonIfUnsupported); + return support.IsConvolution3dSupported(infos[0], + infos[1], + desc, + infos[2], + EmptyOptional(), + reasonIfUnsupported); } else { - return IsConvolution3dSupported(infos[0], - infos[1], - desc, - infos[2], - infos[3], - reasonIfUnsupported); + return support.IsConvolution3dSupported(infos[0], + infos[1], + desc, + infos[2], + infos[3], + reasonIfUnsupported); } } case LayerType::DepthToSpace: - return IsDepthToSpaceSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const DepthToSpaceDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsDepthToSpaceSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const DepthToSpaceDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::DepthwiseConvolution2d: { if (infos.size() != 4) @@ -296,217 +312,223 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type, auto desc = *(PolymorphicDowncast<const DepthwiseConvolution2dDescriptor*>(&descriptor)); if (infos[3] == TensorInfo()) { - return IsDepthwiseConvolutionSupported(infos[0], - infos[1], - desc, - infos[2], - EmptyOptional(), - reasonIfUnsupported); + return support.IsDepthwiseConvolutionSupported(infos[0], + infos[1], + desc, + infos[2], + EmptyOptional(), + reasonIfUnsupported); } else { - return IsDepthwiseConvolutionSupported(infos[0], - infos[1], - desc, - infos[2], - infos[3], - reasonIfUnsupported); + return support.IsDepthwiseConvolutionSupported(infos[0], + infos[1], + desc, + infos[2], + infos[3], + reasonIfUnsupported); } } case LayerType::Dequantize: - return IsDequantizeSupported(infos[0], infos[1], reasonIfUnsupported); + return support.IsDequantizeSupported(infos[0], infos[1], reasonIfUnsupported); case LayerType::DetectionPostProcess: { auto desc = *(PolymorphicDowncast<const DetectionPostProcessDescriptor*>(&descriptor)); - return LayerSupportBase::IsDetectionPostProcessSupported(infos[0], - infos[1], - infos[2], - infos[3], - infos[4], - infos[5], - infos[6], - desc, - reasonIfUnsupported); + return support.IsDetectionPostProcessSupported(infos[0], + infos[1], + infos[2], + infos[3], + infos[4], + infos[5], + infos[6], + desc, + reasonIfUnsupported); } case LayerType::Division: - return IsDivisionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); + return support.IsDivisionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); case LayerType::ElementwiseUnary: - return IsElementwiseUnarySupported(infos[0], - infos[1], - *(PolymorphicDowncast<const ElementwiseUnaryDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsElementwiseUnarySupported(infos[0], + infos[1], + *(PolymorphicDowncast<const + ElementwiseUnaryDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Fill: - return IsFillSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const FillDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsFillSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const FillDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Floor: - return IsFloorSupported(infos[0], infos[1], reasonIfUnsupported); + return support.IsFloorSupported(infos[0], infos[1], reasonIfUnsupported); case LayerType::FullyConnected: - return IsFullyConnectedSupported(infos[0], + return support.IsFullyConnectedSupported(infos[0], + infos[1], + infos[2], + infos[3], + *(PolymorphicDowncast<const + FullyConnectedDescriptor*>(&descriptor)), + reasonIfUnsupported); + case LayerType::Gather: + return support.IsGatherSupported(infos[0], infos[1], infos[2], - infos[3], - *(PolymorphicDowncast<const FullyConnectedDescriptor*>(&descriptor)), + *(PolymorphicDowncast<const GatherDescriptor*>(&descriptor)), reasonIfUnsupported); - case LayerType::Gather: - return IsGatherSupported(infos[0], - infos[1], - infos[2], - *(PolymorphicDowncast<const GatherDescriptor*>(&descriptor)), - reasonIfUnsupported); case LayerType::GatherNd: - return IsGatherNdSupported(infos[0], - infos[1], - infos[2], - reasonIfUnsupported); + return support.IsGatherNdSupported(infos[0], + infos[1], + infos[2], + reasonIfUnsupported); case LayerType::Input: - return IsInputSupported(infos[0], reasonIfUnsupported); + return support.IsInputSupported(infos[0], reasonIfUnsupported); case LayerType::InstanceNormalization: - return IsInstanceNormalizationSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const InstanceNormalizationDescriptor*> - (&descriptor)), - reasonIfUnsupported); + return support.IsInstanceNormalizationSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const + InstanceNormalizationDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::L2Normalization: - return IsL2NormalizationSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const L2NormalizationDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsL2NormalizationSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const + L2NormalizationDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::LogicalBinary: - return IsLogicalBinarySupported(infos[0], - infos[1], - infos[2], - *(PolymorphicDowncast<const LogicalBinaryDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsLogicalBinarySupported(infos[0], + infos[1], + infos[2], + *(PolymorphicDowncast<const + LogicalBinaryDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::LogSoftmax: - return IsLogSoftmaxSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const LogSoftmaxDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsLogSoftmaxSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const LogSoftmaxDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Lstm: - return IsLstmSupported(infos[0], - infos[1], - infos[2], - infos[3], - infos[4], - infos[5], - infos[6], - *(PolymorphicDowncast<const LstmDescriptor*>(&descriptor)), - lstmParamsInfo.value(), - reasonIfUnsupported); + return support.IsLstmSupported(infos[0], + infos[1], + infos[2], + infos[3], + infos[4], + infos[5], + infos[6], + *(PolymorphicDowncast<const LstmDescriptor*>(&descriptor)), + lstmParamsInfo.value(), + reasonIfUnsupported); case LayerType::Map: return true; case LayerType::Maximum: - return IsMaximumSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); + return support.IsMaximumSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); case LayerType::Mean: - return IsMeanSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const MeanDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsMeanSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const MeanDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::MemCopy: - return LayerSupportBase::IsMemCopySupported(infos[0], infos[1], reasonIfUnsupported); + return support.IsMemCopySupported(infos[0], infos[1], reasonIfUnsupported); case LayerType::MemImport: - return LayerSupportBase::IsMemImportSupported(infos[0], infos[1], reasonIfUnsupported); + return support.IsMemImportSupported(infos[0], infos[1], reasonIfUnsupported); case LayerType::Merge: - return LayerSupportBase::IsMergeSupported(infos[0], + return support.IsMergeSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); case LayerType::Minimum: - return IsMinimumSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); + return support.IsMinimumSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); case LayerType::Multiplication: - return IsMultiplicationSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); + return support.IsMultiplicationSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); case LayerType::Normalization: - return IsNormalizationSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const NormalizationDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsNormalizationSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const + NormalizationDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Output: - return IsOutputSupported(infos[0], reasonIfUnsupported); + return support.IsOutputSupported(infos[0], reasonIfUnsupported); case LayerType::Pad: - return IsPadSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const PadDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsPadSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const PadDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Permute: - return IsPermuteSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const PermuteDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsPermuteSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const PermuteDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Pooling2d: - return IsPooling2dSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const Pooling2dDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsPooling2dSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const Pooling2dDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Pooling3d: - return IsPooling3dSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const Pooling3dDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsPooling3dSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const Pooling3dDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Prelu: - return IsPreluSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); + return support.IsPreluSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); case LayerType::QLstm: - return IsQLstmSupported(infos[0], - infos[1], - infos[2], - infos[3], - infos[4], - infos[5], - *(PolymorphicDowncast<const QLstmDescriptor*>(&descriptor)), - lstmParamsInfo.value(), - reasonIfUnsupported); - case LayerType::Quantize: - return IsQuantizeSupported(infos[0], infos[1], reasonIfUnsupported); - case LayerType::QuantizedLstm: - return IsQuantizedLstmSupported(infos[0], + return support.IsQLstmSupported(infos[0], infos[1], infos[2], infos[3], infos[4], - quantizedLstmParamsInfo.value(), + infos[5], + *(PolymorphicDowncast<const QLstmDescriptor*>(&descriptor)), + lstmParamsInfo.value(), reasonIfUnsupported); + case LayerType::Quantize: + return support.IsQuantizeSupported(infos[0], infos[1], reasonIfUnsupported); + case LayerType::QuantizedLstm: + return support.IsQuantizedLstmSupported(infos[0], + infos[1], + infos[2], + infos[3], + infos[4], + quantizedLstmParamsInfo.value(), + reasonIfUnsupported); case LayerType::Rank: return true; case LayerType::Reshape: - return IsReshapeSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const ReshapeDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsReshapeSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const ReshapeDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Resize: - return IsResizeSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const ResizeDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsResizeSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const ResizeDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Reduce: - return IsReduceSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const ReduceDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsReduceSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const ReduceDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Shape: - return LayerSupportBase::IsShapeSupported(infos[0], - infos[1], - reasonIfUnsupported); + return support.IsShapeSupported(infos[0], + infos[1], + reasonIfUnsupported); case LayerType::Slice: - return IsSliceSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const SliceDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsSliceSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const SliceDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Softmax: - return IsSoftmaxSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const SoftmaxDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsSoftmaxSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const SoftmaxDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::SpaceToBatchNd: - return IsSpaceToBatchNdSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const SpaceToBatchNdDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsSpaceToBatchNdSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const + SpaceToBatchNdDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::SpaceToDepth: - return IsSpaceToDepthSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const SpaceToDepthDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsSpaceToDepthSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const SpaceToDepthDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Splitter: { std::vector<TensorInfo> outputInfos; @@ -514,10 +536,10 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type, { outputInfos.push_back(infos[i]); } - return IsSplitterSupported(infos[0], - {outputInfos.begin(), outputInfos.end()}, - *(PolymorphicDowncast<const ViewsDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsSplitterSupported(infos[0], + {outputInfos.begin(), outputInfos.end()}, + *(PolymorphicDowncast<const ViewsDescriptor*>(&descriptor)), + reasonIfUnsupported); } case LayerType::Stack: { @@ -526,23 +548,23 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type, { inputInfos.push_back(&infos[i]); } - return IsStackSupported(inputInfos, - infos[infos.size() - 1], - *(PolymorphicDowncast<const StackDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsStackSupported(inputInfos, + infos[infos.size() - 1], + *(PolymorphicDowncast<const StackDescriptor*>(&descriptor)), + reasonIfUnsupported); } case LayerType::StridedSlice: - return IsStridedSliceSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const StridedSliceDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsStridedSliceSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const StridedSliceDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::Subtraction: - return IsSubtractionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); + return support.IsSubtractionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); case LayerType::Transpose: - return IsTransposeSupported(infos[0], - infos[1], - *(PolymorphicDowncast<const TransposeDescriptor*>(&descriptor)), - reasonIfUnsupported); + return support.IsTransposeSupported(infos[0], + infos[1], + *(PolymorphicDowncast<const TransposeDescriptor*>(&descriptor)), + reasonIfUnsupported); case LayerType::TransposeConvolution2d: { if (infos.size() != 4) @@ -554,34 +576,36 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type, auto desc = *(PolymorphicDowncast<const TransposeConvolution2dDescriptor*>(&descriptor)); if (infos[3] == TensorInfo()) { - return IsTransposeConvolution2dSupported(infos[0], - infos[1], - desc, - infos[2], - EmptyOptional(), - reasonIfUnsupported); + return support.IsTransposeConvolution2dSupported(infos[0], + infos[1], + desc, + infos[2], + EmptyOptional(), + reasonIfUnsupported); } else { - return IsTransposeConvolution2dSupported(infos[0], - infos[1], - desc, - infos[2], - infos[3], - reasonIfUnsupported); + return support.IsTransposeConvolution2dSupported(infos[0], + infos[1], + desc, + infos[2], + infos[3], + reasonIfUnsupported); } } case LayerType::UnidirectionalSequenceLstm: - return IsUnidirectionalSequenceLstmSupported(infos[0], - infos[1], - infos[2], - infos[3], - infos[4], - infos[5], - *(PolymorphicDowncast<const - UnidirectionalSequenceLstmDescriptor*>(&descriptor)), - lstmParamsInfo.value(), - reasonIfUnsupported); + { + auto desc = *(PolymorphicDowncast<const UnidirectionalSequenceLstmDescriptor*>(&descriptor)); + return support.IsUnidirectionalSequenceLstmSupported(infos[0], + infos[1], + infos[2], + infos[3], + infos[4], + infos[5], + desc, + lstmParamsInfo.value(), + reasonIfUnsupported); + } case LayerType::Unmap: return true; default: @@ -592,6 +616,54 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type, } } +bool NeonLayerSupport::IsLayerSupported(const LayerType& type, + const std::vector<TensorInfo>& infos, + const BaseDescriptor& descriptor, + const Optional<LstmInputParamsInfo>& lstmParamsInfo, + const Optional<QuantizedLstmInputParamsInfo>& quantizedLstmParamsInfo, + Optional<std::string&> reasonIfUnsupported) const +{ + bool isSupported = IsLayerTypeSupported(type, + infos, + descriptor, + lstmParamsInfo, + quantizedLstmParamsInfo, + reasonIfUnsupported, + *this); + + // For android-nn-driver and support library, to run FP16 operations on CpuAcc we need at least v8.2 + // architecture. If the available architecture is older than v8.2, we can check if the operator is + // supported by changing operator inputs & outputs to be FP32. + // This does not change the operator datatype in the above parsers to be FP32. We are simply reporting + // to the parsers if the operator can supported in ArmNN. We will then re-enter ArmNN (Network.cpp) + // where we will recheck IsLayerSupported() on the FP16 datatype, update the operator to be FP32, + // and, insert convert layers around the FP32 operator. + if (reasonIfUnsupported.has_value()) + { + std::string checkStr = "This CPU architecture does not support F16 data type, you need v8.2 or above"; + if (!isSupported + && reasonIfUnsupported.value().find(checkStr) != std::string::npos) + { + std::vector<TensorInfo> newInfos; + for (auto info: infos) + { + newInfos.emplace_back(OverrideDataType(info, DataType::Float32)); + } + + std::string tmpString; + return IsLayerTypeSupported(type, + newInfos, + descriptor, + lstmParamsInfo, + quantizedLstmParamsInfo, + tmpString, + *this); + } + } + + return isSupported; +} + bool NeonLayerSupport::IsActivationSupported(const TensorInfo& input, const TensorInfo& output, const ActivationDescriptor& descriptor, diff --git a/src/backends/tosaCommon/TosaMappings.cpp b/src/backends/tosaCommon/TosaMappings.cpp index 1452e4aefd..0b5fa1a158 100644 --- a/src/backends/tosaCommon/TosaMappings.cpp +++ b/src/backends/tosaCommon/TosaMappings.cpp @@ -24,8 +24,10 @@ TosaSerializationBasicBlock* GetTosaMapping(const Layer* layer, switch (type) { case LayerType::Addition: + case LayerType::Multiplication: + case LayerType::Subtraction: { - return ConvertAdditionToTosaOperator(layer, inputs, outputs); + return ConvertElementwiseBinaryToTosaOperator(layer, type, inputs, outputs); } case LayerType::Concat: { @@ -77,6 +79,11 @@ TosaSerializationBasicBlock* GetTosaMapping(const Layer* layer, auto transposeConv2dDesc = PolymorphicDowncast<const TransposeConvolution2dDescriptor*>(&descriptor); return ConvertTransposeConv2dToTosaOperator(layer, inputs, outputs, transposeConv2dDesc); } + case LayerType::Transpose: + { + auto transposeDesc = PolymorphicDowncast<const TransposeDescriptor*>(&descriptor); + return ConvertTransposeToTosaOperator(layer, inputs, outputs, transposeDesc); + } default: { return CreateEmptyTosaSerializationBasicBlock(); diff --git a/src/backends/tosaCommon/operatorMappings/AdditionOperator.hpp b/src/backends/tosaCommon/operatorMappings/AdditionOperator.hpp deleted file mode 100644 index 5eb7441531..0000000000 --- a/src/backends/tosaCommon/operatorMappings/AdditionOperator.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "TosaOperatorUtils.hpp" - -#include <Layer.hpp> - -#include <tosa_serialization_handler.h> - -using namespace armnn; -using namespace tosa; - -TosaSerializationBasicBlock* ConvertAdditionToTosaOperator(const Layer* layer, - const std::vector<const TensorInfo*>& inputs, - const std::vector<const TensorInfo*>& outputs); - diff --git a/src/backends/tosaCommon/operatorMappings/CMakeLists.txt b/src/backends/tosaCommon/operatorMappings/CMakeLists.txt index 2443dc0585..2ec052cd43 100644 --- a/src/backends/tosaCommon/operatorMappings/CMakeLists.txt +++ b/src/backends/tosaCommon/operatorMappings/CMakeLists.txt @@ -4,8 +4,6 @@ # list(APPEND armnnTosaBackendOperators_sources - AdditionOperator.hpp - AdditionOperator.cpp AvgPool2DIgnoreValueOperator.hpp AvgPool2DIgnoreValueOperator.cpp ConcatOperator.hpp @@ -14,6 +12,8 @@ list(APPEND armnnTosaBackendOperators_sources ConstantOperator.cpp Conv2dOperator.hpp Conv2dOperator.cpp + ElementwiseBinaryOperator.hpp + ElementwiseBinaryOperator.cpp Pooling2DOperator.hpp Pooling2DOperator.cpp ReshapeOperator.hpp @@ -23,6 +23,8 @@ list(APPEND armnnTosaBackendOperators_sources TosaOperatorUtils.hpp TransposeConv2dOperator.hpp TransposeConv2dOperator.cpp + TransposeOperator.hpp + TransposeOperator.cpp ) add_library(armnnTosaBackendOperators OBJECT ${armnnTosaBackendOperators_sources}) diff --git a/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp b/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp new file mode 100644 index 0000000000..9909e66a7d --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp @@ -0,0 +1,103 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ElementwiseBinaryOperator.hpp" + +TosaSerializationBasicBlock* ConvertElementwiseBinaryToTosaOperator(const Layer* layer, + const LayerType type, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs) +{ + std::string input0Name = std::string("input0_"); + std::string input1Name = std::string("input1_"); + std::string outputName = std::string("output0_"); + std::string blockName; + + // If a layer is present then the block will be used for execution, so input and output names need to be determined + // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter. + if(layer != nullptr) + { + // Get the layers connected to the input slots and determine unique tensor names. + Layer& connectedLayer0 = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer(); + input0Name = GenerateUniqueName(connectedLayer0, 0); + + Layer& connectedLayer1 = layer->GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer(); + input1Name = GenerateUniqueName(connectedLayer1, 1); + + // Determine unique output tensor name. + outputName = GenerateUniqueOutputName(*layer, 0); + } + + TosaSerializationOperator* op = nullptr; + switch(type) + { + case LayerType::Addition: + { + op = new TosaSerializationOperator(Op_ADD, + Attribute_NONE, + nullptr, + {input0Name, input1Name}, + {outputName}); + blockName = std::string("Op_ADD_block_") + GetUniqueTosaMappingID(); + break; + } + case LayerType::Multiplication: + { + int32_t shift = 0; + TosaMulAttribute mulAttribute(shift); + op = new TosaSerializationOperator(Op_MUL, + Attribute_MulAttribute, + &mulAttribute, + {input0Name, input1Name}, + {outputName}); + blockName = std::string("Op_MUL_block_") + GetUniqueTosaMappingID(); + break; + } + case LayerType::Subtraction: + { + op = new TosaSerializationOperator(Op_SUB, + Attribute_NONE, + nullptr, + {input0Name, input1Name}, + {outputName}); + blockName = std::string("Op_SUB_block_") + GetUniqueTosaMappingID(); + break; + } + default: + throw armnn::Exception("ConvertElementwiseBinaryToTosaOperator: Unsupported layer type."); + } + ARMNN_ASSERT(op != nullptr); + + std::vector<TosaSerializationTensor*> tensors; + // Only add input tensors if connected layer is an input layer. + // As intermediate or constant tensors will be created separately. + // There also can't be duplicate tensor. + if(input0Name.find("input0_") != std::string::npos) + { + std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputs[0]->GetShape()); + DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType()); + tensors.push_back(new TosaSerializationTensor(input0Name, inputShape0, inputDType0, {})); + } + if(input1Name.find("input1_") != std::string::npos) + { + std::vector<int32_t> inputShape1 = GetTosaTensorShape(inputs[1]->GetShape()); + DType inputDType1 = ArmNNToDType(inputs[1]->GetDataType()); + tensors.push_back(new TosaSerializationTensor(input1Name, inputShape1, inputDType1, {})); + } + + std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputs[0]->GetShape()); + DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType()); + + tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {})); + + // operatorInputNames/operatorOutputNames ends up being the same as + // blockInputNames/blockOutputNames for one-to-one ArmNN to Tosa mappings + return new TosaSerializationBasicBlock(blockName, // name + {op}, // operators + tensors, // tensors + {input0Name, input1Name}, // inputs + {outputName}); // outputs +} + diff --git a/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.hpp b/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.hpp new file mode 100644 index 0000000000..86031c6e06 --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "TosaOperatorUtils.hpp" + +#include <Layer.hpp> + +#include <tosa_serialization_handler.h> + +using namespace armnn; +using namespace tosa; + +TosaSerializationBasicBlock* ConvertElementwiseBinaryToTosaOperator(const Layer* layer, + const LayerType type, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs);
\ No newline at end of file diff --git a/src/backends/tosaCommon/operatorMappings/TosaCommonOperators.hpp b/src/backends/tosaCommon/operatorMappings/TosaCommonOperators.hpp index 052c54c3af..3f27371295 100644 --- a/src/backends/tosaCommon/operatorMappings/TosaCommonOperators.hpp +++ b/src/backends/tosaCommon/operatorMappings/TosaCommonOperators.hpp @@ -5,12 +5,13 @@ #pragma once -#include "AdditionOperator.hpp" +#include "AvgPool2DIgnoreValueOperator.hpp" #include "ConcatOperator.hpp" #include "ConstantOperator.hpp" #include "Conv2dOperator.hpp" -#include "AvgPool2DIgnoreValueOperator.hpp" +#include "ElementwiseBinaryOperator.hpp" #include "Pooling2DOperator.hpp" #include "ReshapeOperator.hpp" #include "SliceOperator.hpp" -#include "TransposeConv2dOperator.hpp"
\ No newline at end of file +#include "TransposeConv2dOperator.hpp" +#include "TransposeOperator.hpp" diff --git a/src/backends/tosaCommon/operatorMappings/AdditionOperator.cpp b/src/backends/tosaCommon/operatorMappings/TransposeOperator.cpp index 7014886d92..56178e428b 100644 --- a/src/backends/tosaCommon/operatorMappings/AdditionOperator.cpp +++ b/src/backends/tosaCommon/operatorMappings/TransposeOperator.cpp @@ -3,36 +3,37 @@ // SPDX-License-Identifier: MIT // -#include "AdditionOperator.hpp" +#include "TransposeOperator.hpp" -TosaSerializationBasicBlock* ConvertAdditionToTosaOperator(const Layer* layer, - const std::vector<const TensorInfo*>& inputs, - const std::vector<const TensorInfo*>& outputs) +TosaSerializationBasicBlock* ConvertTransposeToTosaOperator(const Layer* layer, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs, + const TransposeDescriptor* transposeDescriptor) { std::string input0Name = std::string("input0_"); - std::string input1Name = std::string("input1_"); std::string outputName = std::string("output0_"); - std::string blockName = std::string("Op_ADD_block_") + GetUniqueTosaMappingID(); + std::string blockName = std::string("Op_TRANSPOSE_block_") + GetUniqueTosaMappingID(); // If a layer is present then the block will be used for execution, so input and output names need to be determined // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter. if(layer != nullptr) { - // Get the layers connected to the input slots and determine unique tensors names. + // Get the layers connected to the input slot and determine unique tensor name. Layer& connectedLayer0 = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer(); input0Name = GenerateUniqueName(connectedLayer0, 0); - Layer& connectedLayer1 = layer->GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer(); - input1Name = GenerateUniqueName(connectedLayer1, 1); - // Determine unique output tensor name. outputName = GenerateUniqueOutputName(*layer, 0); } - auto* op = new TosaSerializationOperator(Op_ADD, - Attribute_NONE, - nullptr, - {input0Name, input1Name}, + std::vector<int32_t> mappings(transposeDescriptor->m_DimMappings.begin(), + transposeDescriptor->m_DimMappings.end()); + TosaTransposeAttribute attribute(mappings); + + auto* op = new TosaSerializationOperator(Op_TRANSPOSE, + Attribute_TransposeAttribute, + &attribute, + {input0Name}, {outputName}); @@ -49,14 +50,6 @@ TosaSerializationBasicBlock* ConvertAdditionToTosaOperator(const Layer* layer, tensors.push_back(new TosaSerializationTensor(input0Name, inputShape0, inputDType0, {})); } - if(input1Name.find("input1_") != std::string::npos) - { - std::vector<int32_t> inputShape1 = GetTosaTensorShape(inputs[1]->GetShape()); - DType inputDType1 = ArmNNToDType(inputs[1]->GetDataType()); - - tensors.push_back(new TosaSerializationTensor(input1Name, inputShape1, inputDType1, {})); - } - std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputs[0]->GetShape()); DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType()); @@ -67,6 +60,6 @@ TosaSerializationBasicBlock* ConvertAdditionToTosaOperator(const Layer* layer, return new TosaSerializationBasicBlock(blockName, // name {op}, // operators tensors, // tensors - {input0Name, input1Name}, // inputs + {input0Name}, // inputs {outputName}); // outputs }
\ No newline at end of file diff --git a/src/backends/tosaCommon/operatorMappings/TransposeOperator.hpp b/src/backends/tosaCommon/operatorMappings/TransposeOperator.hpp new file mode 100644 index 0000000000..3d1e2acd14 --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/TransposeOperator.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "TosaOperatorUtils.hpp" + +#include <Layer.hpp> + +#include <tosa_serialization_handler.h> + +using namespace armnn; +using namespace tosa; + +TosaSerializationBasicBlock* ConvertTransposeToTosaOperator(const Layer* layer, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs, + const TransposeDescriptor* transposeDescriptor); diff --git a/src/backends/tosaCommon/test/OneToOneMappingTests.cpp b/src/backends/tosaCommon/test/OneToOneMappingTests.cpp index b3ab14a774..4cc37918e5 100644 --- a/src/backends/tosaCommon/test/OneToOneMappingTests.cpp +++ b/src/backends/tosaCommon/test/OneToOneMappingTests.cpp @@ -253,6 +253,54 @@ TEST_CASE("GetTosaMappingFromLayer_Conv2dLayer") basicBlock, inputShape, outputShape, Op_CONV2D, Attribute_ConvAttribute, descriptor, LayerType::Convolution2d); } +TEST_CASE("GetTosaMapping_MultiplicationLayer") +{ + + const TensorInfo input0Info ({ 1, 2, 4, 2 }, DataType::Float32); + const TensorInfo input1Info ({ 1, 2, 4, 2 }, DataType::Float32); + const TensorInfo outputInfo ({ 1, 2, 4, 2 }, DataType::Float32); + + std::vector<std::vector<int32_t>> inputShape = {{ 1, 2, 4, 2 }, { 1, 2, 4, 2 }}; + std::vector<std::vector<int32_t>> outputShape = {{ 1, 2, 4, 2 }}; + + TosaSerializationBasicBlock* basicBlock = + GetTosaMapping(nullptr, LayerType::Multiplication, {&input0Info, &input1Info}, {&outputInfo}, BaseDescriptor()); + AssertTosaOneToOneMappingBasicBlock( basicBlock, inputShape, outputShape, + tosa::Op_MUL, tosa::Attribute_MulAttribute, BaseDescriptor(), LayerType::Multiplication); +} + +TEST_CASE("GetTosaMappingFromLayer_MultiplicationLayer") +{ + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); + IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); + IConnectableLayer* add = net->AddMultiplicationLayer("multiplication"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + input1->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + TensorInfo info = TensorInfo({ 2, 2 }, DataType::Float32, 0.0f, 0, true); + + input0->GetOutputSlot(0).SetTensorInfo(info); + input1->GetOutputSlot(0).SetTensorInfo(info); + add->GetOutputSlot(0).SetTensorInfo(info); + + std::vector<std::vector<int32_t>> inputShape = {{ 2, 2 }, { 2, 2 }}; + std::vector<std::vector<int32_t>> outputShape = {{ 2, 2 }}; + + TosaSerializationBasicBlock* basicBlock = + GetTosaMappingFromLayer(PolymorphicDowncast<Layer*>(add)); + AssertTosaOneToOneMappingBasicBlock( basicBlock, inputShape, outputShape, + tosa::Op_MUL, Attribute_MulAttribute, BaseDescriptor(), LayerType::Multiplication); +} + TEST_CASE("GetTosaMapping_AvgPool2DLayer") { Pooling2dDescriptor descriptor; @@ -616,6 +664,64 @@ TEST_CASE("GetTosaMappingFromLayer_TransposeConv2dLayer") LayerType::TransposeConvolution2d); } +TEST_CASE("GetTosaMapping_TransposeLayer") +{ + TensorInfo inputInfo = TensorInfo({ 1, 1, 5, 3 }, DataType::Float32, 0.0f, 0, true); + TensorInfo outputInfo = TensorInfo({ 1, 5, 1, 3 }, DataType::Float32, 0.0f, 0, true); + + std::vector<std::vector<int32_t>> inputShape = {{ 1, 1, 5, 3 }}; + std::vector<std::vector<int32_t>> outputShape = {{ 1, 5, 1, 3 }}; + + TransposeDescriptor transposeDescriptor = TransposeDescriptor({ 0, 2, 1 ,3 }); + + TosaSerializationBasicBlock* basicBlock = + GetTosaMapping(nullptr, LayerType::Transpose, {&inputInfo,}, {&outputInfo}, transposeDescriptor); + AssertTosaOneToOneMappingBasicBlock(basicBlock, + inputShape, + outputShape, + Op_TRANSPOSE, + Attribute_TransposeAttribute, + transposeDescriptor, + LayerType::Transpose); +} + +TEST_CASE("GetTosaMappingFromLayer_TransposeLayer") +{ + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + TransposeDescriptor transposeDescriptor = TransposeDescriptor({ 0, 2, 1 ,3 }); + + IConnectableLayer* input = net->AddInputLayer(0, "input0"); + IConnectableLayer* transpose = net->AddTransposeLayer(transposeDescriptor, "transpose"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input->GetOutputSlot(0).Connect(transpose->GetInputSlot(0)); + transpose->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + TensorInfo inputInfo = TensorInfo({ 1, 1, 5, 3 }, DataType::Float32, 0.0f, 0, true); + TensorInfo outputInfo = TensorInfo({ 1, 5, 1, 3 }, DataType::Float32, 0.0f, 0, true); + + input->GetOutputSlot(0).SetTensorInfo(inputInfo); + transpose->GetOutputSlot(0).SetTensorInfo(outputInfo); + + std::vector<std::vector<int32_t>> inputShape = {{ 1, 1, 5, 3 }}; + std::vector<std::vector<int32_t>> outputShape = {{ 1, 5, 1, 3 }}; + + TosaSerializationBasicBlock* basicBlock = + GetTosaMappingFromLayer(PolymorphicDowncast<Layer*>(transpose)); + AssertTosaOneToOneMappingBasicBlock(basicBlock, + inputShape, + outputShape, + Op_TRANSPOSE, + Attribute_TransposeAttribute, + transposeDescriptor, + LayerType::Transpose); +} + TEST_CASE("GetTosaMapping_Unimplemented") { TosaSerializationBasicBlock* basicBlock = diff --git a/src/backends/tosaCommon/test/TosaTestUtils.hpp b/src/backends/tosaCommon/test/TosaTestUtils.hpp index 140cb83983..e24055371f 100644 --- a/src/backends/tosaCommon/test/TosaTestUtils.hpp +++ b/src/backends/tosaCommon/test/TosaTestUtils.hpp @@ -158,6 +158,14 @@ inline void VerifyTosaAttribute(const BaseDescriptor& descriptor, CHECK(stride == transposeConvAttribute.stride()); break; } + case LayerType::Transpose: + { + auto transposeDesc = PolymorphicDowncast<const TransposeDescriptor*>(&descriptor); + std::vector<int> outPerm(transposeDesc->m_DimMappings.begin(), transposeDesc->m_DimMappings.end()); + TosaTransposeAttribute transposeAttribute(attribute); + CHECK(outPerm == transposeAttribute.perms()); + break; + } default: break; } diff --git a/src/backends/tosaReference/TosaRefLayerSupport.cpp b/src/backends/tosaReference/TosaRefLayerSupport.cpp index 0d0d07a783..6113b5861a 100644 --- a/src/backends/tosaReference/TosaRefLayerSupport.cpp +++ b/src/backends/tosaReference/TosaRefLayerSupport.cpp @@ -38,6 +38,8 @@ bool TosaRefLayerSupport::IsLayerSupported(const LayerType& type, case LayerType::Output: return true; case LayerType::Addition: + case LayerType::Multiplication: + case LayerType::Subtraction: // Setup inputs and outputs inputInfos.push_back(&infos[0]); inputInfos.push_back(&infos[1]); @@ -69,7 +71,7 @@ bool TosaRefLayerSupport::IsLayerSupported(const LayerType& type, case LayerType::Pooling2d: case LayerType::Reshape: case LayerType::Slice: - // Setup inputs and outputs + case LayerType::Transpose: inputInfos.push_back(&infos[0]); outputInfos.push_back(&infos[1]); break; diff --git a/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp b/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp index a377293fbf..e19462e986 100644 --- a/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp +++ b/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp @@ -8,10 +8,13 @@ #include "backendsCommon/test/AdditionEndToEndTestImpl.hpp" #include "backendsCommon/test/Convolution2dEndToEndTestImpl.hpp" #include "backendsCommon/test/ConcatEndToEndTestImpl.hpp" +#include "backendsCommon/test/MultiplicationEndToEndTestImpl.hpp" #include "backendsCommon/test/Pooling2dEndToEndTestImpl.hpp" #include "backendsCommon/test/ReshapeEndToEndTestImpl.hpp" #include "backendsCommon/test/SliceEndToEndTestImpl.hpp" +#include "backendsCommon/test/SubtractionEndToEndTestImpl.hpp" #include "backendsCommon/test/TransposeConvolution2dEndToEndTestImpl.hpp" +#include "backendsCommon/test/TransposeEndToEndTestImpl.hpp" #include <doctest/doctest.h> @@ -150,6 +153,35 @@ TEST_CASE("TosaRefSliceEndtoEndTestFloat16") { SliceEndToEndFloat16<DataType::Float16>(tosaDefaultBackends); } +TEST_CASE("TosaRefSubtractionEndtoEndTestFloat32") +{ + SubtractionEndToEnd<DataType::Float32>(tosaDefaultBackends); +} + +TEST_CASE("TosaRefSubtractionEndtoEndTestInt32") +{ + SubtractionEndToEnd<DataType::Signed32>(tosaDefaultBackends); +} + +TEST_CASE("TosaRefSubtractionEndtoEndTestFloat16") +{ + SubtractionEndToEndFloat16<DataType::Float16>(tosaDefaultBackends); +} + +TEST_CASE("TosaRefMultiplicationEndtoEndTestFloat32") +{ + MultiplicationEndToEnd<DataType::Float32>(tosaDefaultBackends); +} + +TEST_CASE("TosaRefMultiplicationEndtoEndTestInt32") +{ + MultiplicationEndToEnd<DataType::Signed32>(tosaDefaultBackends); +} + +TEST_CASE("TosaRefMultiplicationEndtoEndTestFloat16") +{ + MultiplicationEndToEndFloat16<DataType::Float16>(tosaDefaultBackends); +} // TransposeConvolution2d TEST_CASE("TosaRefTransposeConvolution2dEndToEndFloatNhwcTest") @@ -164,4 +196,10 @@ TEST_CASE("TosaRefSimpleTransposeConvolution2dEndToEndFloatNhwcTest") tosaDefaultBackends, armnn::DataLayout::NHWC); } +// Transpose +TEST_CASE("TosaRefTransposeEndtoEndTestFloat32") +{ + TransposeEndToEnd<armnn::DataType::Float32>(tosaDefaultBackends); +} + }
\ No newline at end of file diff --git a/src/backends/tosaReference/test/TosaRefLayerSupportTests.cpp b/src/backends/tosaReference/test/TosaRefLayerSupportTests.cpp index 051965f541..66dfbe8dff 100644 --- a/src/backends/tosaReference/test/TosaRefLayerSupportTests.cpp +++ b/src/backends/tosaReference/test/TosaRefLayerSupportTests.cpp @@ -190,6 +190,50 @@ TEST_CASE("IsLayerSupportedTosaReferenceConv2dUnsupported") CHECK(!supported); } +TEST_CASE("IsLayerSupportedTosaReferenceMultiplication") +{ + TensorShape shape0 = {1,1,3,4}; + TensorShape shape1 = {1,1,3,4}; + TensorShape outShape = {1,1,3,4}; + TensorInfo in0(shape0, armnn::DataType::Float32); + TensorInfo in1(shape1, armnn::DataType::Float32); + TensorInfo out(outShape, armnn::DataType::Float32); + + BaseDescriptor desc; + TosaRefLayerSupport supportChecker; + std::string reasonIfNotSupported; + auto supported = supportChecker.IsLayerSupported(armnn::LayerType::Multiplication, + {in0, in1, out}, + desc, + armnn::EmptyOptional(), + armnn::EmptyOptional(), + reasonIfNotSupported); + + CHECK(supported); +} + +TEST_CASE("IsLayerSupportedTosaReferenceMultiplicationUnsupported") +{ + TensorShape shape0 = {1,1,3,4}; + TensorShape shape1 = {1,2,3,4}; + TensorShape outShape = {1,1,3,4}; + TensorInfo in0(shape0, armnn::DataType::Signed64); + TensorInfo in1(shape1, armnn::DataType::Signed64); + TensorInfo out(outShape, armnn::DataType::Signed64); + + BaseDescriptor desc; + TosaRefLayerSupport supportChecker; + std::string reasonIfNotSupported; + auto supported = supportChecker.IsLayerSupported(armnn::LayerType::Multiplication, + {in0, in1, out}, + desc, + armnn::EmptyOptional(), + armnn::EmptyOptional(), + reasonIfNotSupported); + + CHECK(!supported); +} + TEST_CASE("IsLayerSupportedTosaReferenceMaxPooling2d") { TensorShape inShape = {1,1,3,4}; @@ -376,6 +420,50 @@ TEST_CASE("IsLayerSupportedTosaReferenceSliceUnsupported") CHECK(!supported); } +TEST_CASE("IsLayerSupportedTosaReferenceSubtraction") +{ + TensorShape shape0 = {1,1,3,4}; + TensorShape shape1 = {1,1,3,4}; + TensorShape outShape = {1,1,3,4}; + TensorInfo in0(shape0, armnn::DataType::Float32); + TensorInfo in1(shape1, armnn::DataType::Float32); + TensorInfo out(outShape, armnn::DataType::Float32); + + BaseDescriptor desc; + TosaRefLayerSupport supportChecker; + std::string reasonIfNotSupported; + auto supported = supportChecker.IsLayerSupported(armnn::LayerType::Subtraction, + {in0, in1, out}, + desc, + armnn::EmptyOptional(), + armnn::EmptyOptional(), + reasonIfNotSupported); + + CHECK(supported); +} + +TEST_CASE("IsLayerSupportedTosaReferenceSubtractionUnsupported") +{ + TensorShape shape0 = {1,1,3,4}; + TensorShape shape1 = {4}; + TensorShape outShape = {1,1,3,4}; + TensorInfo in0(shape0, armnn::DataType::Signed64); + TensorInfo in1(shape1, armnn::DataType::Signed64); + TensorInfo out(outShape, armnn::DataType::Signed64); + + BaseDescriptor desc; + TosaRefLayerSupport supportChecker; + std::string reasonIfNotSupported; + auto supported = supportChecker.IsLayerSupported(armnn::LayerType::Subtraction, + {in0, in1, out}, + desc, + armnn::EmptyOptional(), + armnn::EmptyOptional(), + reasonIfNotSupported); + + CHECK(!supported); +} + TEST_CASE("IsLayerSupportedTosaReferenceTransposeConv2d") { TensorInfo inputInfo ({ 1, 3, 3, 1 }, DataType::Float32); @@ -421,4 +509,46 @@ TEST_CASE("IsLayerSupportedTosaReferenceTransposeConv2dUnsupported") CHECK(!supported); } +TEST_CASE("IsLayerSupportedTosaReferenceTranspose") +{ + TensorShape inShape = { 1, 1, 5, 3 }; + TensorShape outShape = { 1, 5, 1, 3 }; + TensorInfo in(inShape, DataType::Float32); + TensorInfo out(outShape, DataType::Float32); + + TransposeDescriptor transposeDescriptor = TransposeDescriptor({ 0, 2, 1 ,3 }); + + TosaRefLayerSupport supportChecker; + std::string reasonIfNotSupported; + auto supported = supportChecker.IsLayerSupported(LayerType::Transpose, + {in, out}, + transposeDescriptor, + EmptyOptional(), + EmptyOptional(), + reasonIfNotSupported); + + CHECK(supported); +} + +TEST_CASE("IsLayerSupportedTosaReferenceTransposeUnsupported") +{ + TensorShape inShape = { 1, 1, 5, 3 }; + TensorShape outShape = { 1, 5, 1, 3 }; + TensorInfo in(inShape, DataType::Signed64); + TensorInfo out(outShape, DataType::Signed64); + + TransposeDescriptor transposeDescriptor = TransposeDescriptor({ 0, 2, 1 ,3 }); + + TosaRefLayerSupport supportChecker; + std::string reasonIfNotSupported; + auto supported = supportChecker.IsLayerSupported(LayerType::Transpose, + {in, out}, + transposeDescriptor, + EmptyOptional(), + EmptyOptional(), + reasonIfNotSupported); + + CHECK(!supported); +} + } |