From 4cc341cf8b5a6e6bb0543504cbbfde6fa11a2cdb Mon Sep 17 00:00:00 2001 From: Mike Kelly Date: Fri, 7 Jul 2023 15:43:06 +0100 Subject: IVGCVSW-7830 Add backend optimizations to remove Reshapes where possible * Added optimization to remove reshapes for Neon and Ref Backends by using overridden TensorInfos * Added ability to delete Subgraphs during Optimization * Fixed naming error in NeonEndToEndTests and CLEndToEndTests * Added LayerNameAndTypeCheck for testing. * Fixed error where layers were not marked as altered when removed in CLBackend Signed-off-by: Mike Kelly Change-Id: I1ac25cd4ec9821470d961831ae2c8d24882276cc --- include/armnn/INetwork.hpp | 12 + include/armnn/Version.hpp | 4 +- include/armnn/backends/ITensorHandle.hpp | 11 +- include/armnn/backends/OptimizationViews.hpp | 7 + src/armnn/Graph.cpp | 10 + src/armnn/Layer.cpp | 15 +- src/armnn/Layer.hpp | 8 +- src/armnn/LoadedNetwork.cpp | 28 +- src/armnn/Network.cpp | 22 ++ src/armnn/layers/ConcatLayer.cpp | 3 +- src/armnn/layers/ReverseV2Layer.cpp | 2 +- src/armnn/layers/SplitterLayer.cpp | 5 +- src/armnnDeserializer/Deserializer.cpp | 7 + src/armnnSerializer/ArmnnSchema.fbs | 2 + src/armnnSerializer/Serializer.cpp | 6 +- src/armnnSerializer/test/SerializerTestUtils.cpp | 2 +- src/armnnSerializer/test/SerializerTests.cpp | 30 ++ src/armnnTestUtils/TestUtils.hpp | 20 +- src/backends/backendsCommon/OptimizationViews.cpp | 8 +- src/backends/backendsCommon/SubgraphUtils.hpp | 186 +++++++++- src/backends/backendsCommon/WorkloadFactory.cpp | 6 +- src/backends/backendsCommon/test/CMakeLists.txt | 1 + .../backendsCommon/test/SubgraphUtilsTest.hpp | 399 +++++++++++++++++++++ src/backends/cl/CMakeLists.txt | 1 + src/backends/cl/ClBackend.cpp | 6 +- src/backends/cl/ClTensorHandle.cpp | 82 +++++ src/backends/cl/ClTensorHandle.hpp | 184 +++++++++- src/backends/cl/backend.mk | 1 + src/backends/cl/test/ClEndToEndTests.cpp | 7 +- src/backends/neon/CMakeLists.txt | 1 + src/backends/neon/NeonBackend.cpp | 32 +- src/backends/neon/NeonTensorHandle.cpp | 47 +++ src/backends/neon/NeonTensorHandle.hpp | 168 ++++++++- src/backends/neon/backend.mk | 1 + src/backends/neon/test/NeonEndToEndTests.cpp | 22 +- src/backends/reference/RefBackend.cpp | 13 +- src/backends/reference/RefTensorHandle.cpp | 69 +++- src/backends/reference/RefTensorHandle.hpp | 88 ++++- .../reference/test/RefCreateWorkloadTests.cpp | 48 +++ src/backends/reference/test/RefEndToEndTests.cpp | 17 + tests/ExecuteNetwork/ArmNNExecutor.cpp | 4 +- 41 files changed, 1524 insertions(+), 61 deletions(-) create mode 100644 src/backends/backendsCommon/test/SubgraphUtilsTest.hpp create mode 100644 src/backends/cl/ClTensorHandle.cpp create mode 100644 src/backends/neon/NeonTensorHandle.cpp diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp index 9b8c3b0b7b..e311fa8840 100644 --- a/include/armnn/INetwork.hpp +++ b/include/armnn/INetwork.hpp @@ -31,6 +31,18 @@ public: virtual IConnectableLayer& GetOwningIConnectableLayer() = 0; virtual unsigned int GetSlotIndex() const = 0; + /// Sets the TensorInfo for this InputSlot. This can be used to override the TensorInfo and if set will be returned + /// instead of the TensorInfo for the Connected OutputSlot. + virtual void SetTensorInfo(const TensorInfo tensorInfo) = 0; + /// Gets the TensorInfo for this InputSlot. If the InputSlot's TensorInfo has not been set then this will get the + /// TensorInfo from the Connected TensorInfo. + virtual const TensorInfo& GetTensorInfo() const = 0; + /// Returns true if this InputSlot either has an overridden TensorInfo for this InputSlot that was set through a + /// call to SetTensorInfo() or is Connected to an OutputSlot that has its TensorInfo set. + virtual bool IsTensorInfoSet() const = 0; + /// Returns true if this InputSlot has an overridden TensorInfo that was set through a call to SetTensorInfo(). + virtual bool IsTensorInfoOverridden() const = 0; + protected: /// Not user deletable. ~IInputSlot() {} diff --git a/include/armnn/Version.hpp b/include/armnn/Version.hpp index 82d546a56a..3c4abdef54 100644 --- a/include/armnn/Version.hpp +++ b/include/armnn/Version.hpp @@ -10,8 +10,8 @@ #define STRINGIFY_MACRO(s) #s // ArmNN version components -#define ARMNN_MAJOR_VERSION 32 -#define ARMNN_MINOR_VERSION 1 +#define ARMNN_MAJOR_VERSION 33 +#define ARMNN_MINOR_VERSION 0 #define ARMNN_PATCH_VERSION 0 /// ARMNN_VERSION: "X.Y.Z" diff --git a/include/armnn/backends/ITensorHandle.hpp b/include/armnn/backends/ITensorHandle.hpp index 78404d0e39..5d8f36ff12 100644 --- a/include/armnn/backends/ITensorHandle.hpp +++ b/include/armnn/backends/ITensorHandle.hpp @@ -1,11 +1,12 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once #include #include +#include namespace armnn { @@ -91,6 +92,14 @@ public: /// Unimport externally allocated memory virtual void Unimport() {}; + + /// Returns a decorated version of this TensorHandle allowing us to override the TensorInfo for it + /// \param tensorInfo the overidden TensorInfo. + virtual std::shared_ptr DecorateTensorHandle(const TensorInfo& tensorInfo) + { + IgnoreUnused(tensorInfo); + return nullptr; + } }; } diff --git a/include/armnn/backends/OptimizationViews.hpp b/include/armnn/backends/OptimizationViews.hpp index 6f27345ca4..ec1dc55156 100644 --- a/include/armnn/backends/OptimizationViews.hpp +++ b/include/armnn/backends/OptimizationViews.hpp @@ -50,9 +50,15 @@ public: m_UntouchedSubgraphs.emplace_back(subgraph); } + void AddDeletedSubgraph(SubgraphView&& subgraph) + { + m_DeletedSubgraphs.emplace_back(subgraph); + } + const Substitutions& GetSubstitutions() const { return m_SuccesfulOptimizations; } const Subgraphs& GetFailedSubgraphs() const { return m_FailedOptimizations; } const Subgraphs& GetUntouchedSubgraphs() const { return m_UntouchedSubgraphs; } + const Subgraphs& GetDeletedSubgraphs() const { return m_DeletedSubgraphs; } Substitutions& GetSubstitutions() { return m_SuccesfulOptimizations; } Subgraphs& GetFailedSubgraphs() { return m_FailedOptimizations; } @@ -67,6 +73,7 @@ private: Substitutions m_SuccesfulOptimizations; ///< Proposed substitutions from successful optimizations Subgraphs m_FailedOptimizations; ///< Subgraphs from the original subgraph which cannot be supported Subgraphs m_UntouchedSubgraphs; ///< Subgraphs from the original subgraph which remain unmodified + Subgraphs m_DeletedSubgraphs; ///< Subgraphs from the original subgraph which have been deleted /// INetworkPtr object used only as a container for any layer generated by the optimization process /// Also, can use to AddPrecompiledLayer to the SubstitutionPair diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp index fee1da4343..cf6f20f82b 100644 --- a/src/armnn/Graph.cpp +++ b/src/armnn/Graph.cpp @@ -508,12 +508,22 @@ void Graph::ReplaceSubgraphConnections(const SubgraphView& subgraph, const Subgr if (subgraphInputSlot->GetConnection()) { IOutputSlot* connectedOutputSlot = subgraphInputSlot->GetConnection(); + InputSlot* inputSlot = PolymorphicDowncast(subgraphInputSlot); + bool isOverridden = inputSlot->IsTensorInfoOverridden(); + ARMNN_ASSERT(connectedOutputSlot); connectedOutputSlot->Disconnect(*subgraphInputSlot); IInputSlot* substituteInputSlot = substituteSubgraphInputSlots.at(inputSlotIdx); ARMNN_ASSERT(substituteInputSlot); connectedOutputSlot->Connect(*substituteInputSlot); + + if (isOverridden) + { + TensorInfo overridden = inputSlot->GetTensorInfo(); + InputSlot* newInputSlot = PolymorphicDowncast(substituteInputSlot); + newInputSlot->SetTensorInfo(overridden); + } } } diff --git a/src/armnn/Layer.cpp b/src/armnn/Layer.cpp index 8d4811ae89..d2f8f2c982 100644 --- a/src/armnn/Layer.cpp +++ b/src/armnn/Layer.cpp @@ -259,7 +259,20 @@ void Layer::CollectWorkloadInputs(WorkloadDataCollector& dataCollector) const // The graph must be well-formed at this point. ARMNN_ASSERT(inputSlot.GetConnection()); const OutputHandler& outputHandler = inputSlot.GetConnectedOutputSlot()->GetOutputHandler(); - dataCollector.Push(outputHandler.GetData(), outputHandler.GetTensorInfo()); + + if (inputSlot.IsTensorInfoOverridden() && outputHandler.GetData()) + { + auto handler = outputHandler.GetData()->DecorateTensorHandle(inputSlot.GetTensorInfo()); + + if (handler) + { + // Add overridden TensorHandle + dataCollector.Push(handler.get(), inputSlot.GetTensorInfo()); + continue; + } + } + // Add default TensorHandle + dataCollector.Push(outputHandler.GetData(), inputSlot.GetTensorInfo()); } } diff --git a/src/armnn/Layer.hpp b/src/armnn/Layer.hpp index 5e097f0fad..4f69e78b62 100644 --- a/src/armnn/Layer.hpp +++ b/src/armnn/Layer.hpp @@ -80,15 +80,15 @@ public: /// Sets the TensorInfo for this InputSlot. This can be used to override the TensorInfo and if set will be returned /// instead of the TensorInfo for the Connected OutputSlot. - void SetTensorInfo(const TensorInfo tensorInfo); + void SetTensorInfo(const TensorInfo tensorInfo) override; /// Gets the TensorInfo for this InputSlot. If the InputSlot's TensorInfo has not been set then this will get the /// TensorInfo from the Connected TensorInfo. - const TensorInfo& GetTensorInfo() const; + const TensorInfo& GetTensorInfo() const override; /// Returns true if this InputSlot either has an overridden TensorInfo for this InputSlot that was set through a /// call to SetTensorInfo() or is Connected to an OutputSlot that has its TensorInfo set. - bool IsTensorInfoSet() const; + bool IsTensorInfoSet() const override; /// Returns true if this InputSlot has an overridden TensorInfo that was set through a call to SetTensorInfo(). - bool IsTensorInfoOverridden() const; + bool IsTensorInfoOverridden() const override; private: Layer& m_OwningLayer; diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp index 3f4aa34a5b..3d84054b69 100644 --- a/src/armnn/LoadedNetwork.cpp +++ b/src/armnn/LoadedNetwork.cpp @@ -955,10 +955,10 @@ Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors, syncDesc.m_Inputs.push_back(inputTensorHandle); WorkloadInfo info; info.m_InputTensorInfos.push_back( - outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo()); + outputLayer->GetInputSlot(0).GetTensorInfo()); auto syncWorkload = std::make_unique(syncDesc, info); ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created"); - m_OutputQueue.push_back(move(syncWorkload)); + m_OutputQueue.push_back(std::move(syncWorkload)); importedOutputIdIndex++; } else @@ -1089,7 +1089,7 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tens timelineUtils->Commit(); } - m_InputQueue.push_back(move(inputWorkload)); + m_InputQueue.push_back(std::move(inputWorkload)); } } @@ -1149,7 +1149,7 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten info.m_InputTensorInfos.push_back(inputTensorInfo); auto syncWorkload = std::make_unique(syncDesc, info); ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created"); - m_OutputQueue.push_back(move(syncWorkload)); + m_OutputQueue.push_back(std::move(syncWorkload)); } else { @@ -1177,7 +1177,7 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten timelineUtils->Commit(); } - m_OutputQueue.push_back(move(outputWorkload)); + m_OutputQueue.push_back(std::move(outputWorkload)); } } @@ -1650,7 +1650,7 @@ std::vector LoadedNetwork::ImportOutputs(const OutputTensors& const InputSlot& inputSlot = layer->GetInputSlots()[0]; ITensorHandleFactory::FactoryId factoryId = inputSlot.GetConnectedOutputSlot()->GetTensorHandleFactoryId(); - const TensorInfo& tensorInfo = inputSlot.GetConnectedOutputSlot()->GetTensorInfo(); + const TensorInfo& tensorInfo = inputSlot.GetTensorInfo(); ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId); ARMNN_ASSERT(handleFactory); @@ -2093,6 +2093,14 @@ std::unique_ptr LoadedNetwork::CreateWorkingMemHandle(Network if (found != m_ConstantTensorHandles.end()) { ITensorHandle* tensorHandle = found->second; + if (slot.IsTensorInfoOverridden()) + { + ITensorHandle* decorated = tensorHandle->DecorateTensorHandle(slot.GetTensorInfo()).get(); + if (decorated) + { + tensorHandle = decorated; + } + } workingMemDescriptor.m_Inputs.push_back(tensorHandle); // Odd case where a constant layer is connected to an output layer @@ -2113,6 +2121,14 @@ std::unique_ptr LoadedNetwork::CreateWorkingMemHandle(Network HandleInfo& handleInfo = outputToHandleInfoMap.at(outputSlot); ITensorHandle* inputTensorHandle = handleInfo.m_TensorHandle; + if (slot.IsTensorInfoOverridden()) + { + ITensorHandle* decorated = inputTensorHandle->DecorateTensorHandle(slot.GetTensorInfo()).get(); + if (decorated) + { + inputTensorHandle = decorated; + } + } workingMemDescriptor.m_Inputs.push_back(inputTensorHandle); // Store the LayerBindingId of the OutputLayer diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 2abaf44587..ae5bde17ca 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -1310,6 +1310,28 @@ OptimizationResult ApplyBackendOptimizations(OptimizedNetworkImpl* optNetObjPtr, }); } + // Remove deleted sub-graphs + for (auto& deletedSubgraph : optimizationViews.GetDeletedSubgraphs()) + { + for (auto& l : deletedSubgraph.GetIConnectableLayers()) + { + Layer* deletedLayer = PolymorphicDowncast(l); + for (unsigned int in = deletedLayer->GetNumInputSlots(); in > 0; --in) + { + auto inputSlot = deletedLayer->GetInputSlot(in -1); + OutputSlot* parentOut = inputSlot.GetConnectedOutputSlot(); + parentOut->Disconnect(inputSlot); + for (unsigned int out = deletedLayer->GetOutputSlot(in -1).GetNumConnections(); out > 0; --out) + { + InputSlot *childIn = deletedLayer->GetOutputSlot(in - 1).GetConnection(out -1); + deletedLayer->GetOutputSlot(in - 1).Disconnect(*childIn); + parentOut->Connect(*childIn); + } + } + optGraph.EraseLayer(deletedLayer); + } + } + if (!optimizationViews.GetFailedSubgraphs().empty()) { std::stringstream warningMsg; diff --git a/src/armnn/layers/ConcatLayer.cpp b/src/armnn/layers/ConcatLayer.cpp index 7a1b689b2c..4629bf245e 100644 --- a/src/armnn/layers/ConcatLayer.cpp +++ b/src/armnn/layers/ConcatLayer.cpp @@ -120,7 +120,7 @@ void ConcatLayer::CreateTensors(const TensorHandleFactoryRegistry& registry, // 3) the input does not come from a Constant layer or input layer // 4) the input is only read by this concat layer // 5) if concat along x or y (2 innermost dimensions) and the previous layers do not require padding - // 6) the input does not have an Overridden TensorInfo + // 6) neither the inputs nor the output have an Overridden TensorInfo if (slot && parentInfo.IsTypeSpaceMatch(info) && //(1) factoryId == slot->GetTensorHandleFactoryId() && //(2) @@ -128,6 +128,7 @@ void ConcatLayer::CreateTensors(const TensorHandleFactoryRegistry& registry, slot->GetOwningLayer().GetType() != LayerType::Input && //(3) slot->GetNumConnections() == 1 && canUseSubTensorOnXorY && //(5) + !GetOutputSlot(0).GetConnection(0)->IsTensorInfoOverridden() && //(6) !currentLayer->GetInputSlot(i).IsTensorInfoOverridden()) //(6) { ARMNN_NO_DEPRECATE_WARN_BEGIN diff --git a/src/armnn/layers/ReverseV2Layer.cpp b/src/armnn/layers/ReverseV2Layer.cpp index 29f8b1b781..201e19819b 100644 --- a/src/armnn/layers/ReverseV2Layer.cpp +++ b/src/armnn/layers/ReverseV2Layer.cpp @@ -40,7 +40,7 @@ void ReverseV2Layer::ValidateTensorShapesFromInputs() VerifyShapeInferenceType(outputShape, m_ShapeInferenceMethod); auto inferredShapes = InferOutputShapes({ - GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape() }); + GetInputSlot(0).GetTensorInfo().GetShape() }); ARMNN_ASSERT(inferredShapes.size() == 1); diff --git a/src/armnn/layers/SplitterLayer.cpp b/src/armnn/layers/SplitterLayer.cpp index 86a42305ff..dc8864a736 100644 --- a/src/armnn/layers/SplitterLayer.cpp +++ b/src/armnn/layers/SplitterLayer.cpp @@ -131,13 +131,14 @@ void SplitterLayer::CreateTensors(const TensorHandleFactoryRegistry& registry, // 2) the same TensorHandleFactory is used for input and split layer output // 3) the output does not go to a Constant layer or input layer // 4) if split along x or y (2 innermost dimensions) and the next layers do not require padding - // 5) none of the outputs have an Overridden TensorInfo + // 5) neither the input nor the outputs have an Overridden TensorInfo if (parentInfo.IsTypeSpaceMatch(info) && //(1) factoryId == slot->GetTensorHandleFactoryId() && //(2) GetOutputSlot(i).GetConnection(0)->GetOwningLayer().GetType() != LayerType::Constant && //(3) GetOutputSlot(i).GetConnection(0)->GetOwningLayer().GetType() != LayerType::Input && //(3) canUseSubTensorOnXorY && //(4) - !GetOutputSlot(i).GetConnection(0)->IsTensorInfoOverridden()) //(5) + !GetOutputSlot(i).GetConnection(0)->IsTensorInfoOverridden() && //(5) + !GetInputSlot(0).IsTensorInfoOverridden()) //(5) { ARMNN_NO_DEPRECATE_WARN_BEGIN return factory.CreateSubTensorHandle(*inputData, diff --git a/src/armnnDeserializer/Deserializer.cpp b/src/armnnDeserializer/Deserializer.cpp index e940bd69ec..d125ef56dd 100644 --- a/src/armnnDeserializer/Deserializer.cpp +++ b/src/armnnDeserializer/Deserializer.cpp @@ -1156,6 +1156,13 @@ void IDeserializer::DeserializerImpl::RegisterInputSlots(GraphPtr graph, auto fbInputSlot = baseLayer->inputSlots()->Get(i); auto fbConnection = fbInputSlot->connection(); armnn::IInputSlot* inputSlot = &(layer->GetInputSlot(fbInputSlot->index())); + + // If the slot has an Overridden tensorInfo then extract it + if (fbInputSlot->isOverridden()) + { + armnn::TensorInfo overriddenTensorInfo = ToTensorInfo(fbInputSlot->overriddenTensorInfo()); + inputSlot->SetTensorInfo(overriddenTensorInfo); + } RegisterInputSlotOfConnection(fbConnection->sourceLayerIndex(), fbConnection->outputSlotIndex(), inputSlot); } } diff --git a/src/armnnSerializer/ArmnnSchema.fbs b/src/armnnSerializer/ArmnnSchema.fbs index 76a1c12787..995a6013c1 100644 --- a/src/armnnSerializer/ArmnnSchema.fbs +++ b/src/armnnSerializer/ArmnnSchema.fbs @@ -106,6 +106,8 @@ table ConstTensor { table InputSlot { index:uint; connection:Connection; + isOverridden:bool; + overriddenTensorInfo:TensorInfo; } table OutputSlot { diff --git a/src/armnnSerializer/Serializer.cpp b/src/armnnSerializer/Serializer.cpp index cf098eb5f8..e10b66f51d 100644 --- a/src/armnnSerializer/Serializer.cpp +++ b/src/armnnSerializer/Serializer.cpp @@ -1974,12 +1974,16 @@ std::vector> // Get the Connection for the InputSlot const IOutputSlot* connection = inputSlot.GetConnection(); + bool isOverridden = inputSlot.IsTensorInfoOverridden(); + + flatbuffers::Offset overriddenTensorInfo = CreateTensorInfo(inputSlot.GetTensorInfo()); // Create FlatBuffer Connection serializer::Connection conn(GetSerializedId(inputSlot.GetConnection()->GetOwningLayerGuid()), connection->CalculateIndexOnOwner()); // Create FlatBuffer InputSlot - inputSlots.push_back(serializer::CreateInputSlot(m_flatBufferBuilder, slotIndex, &conn)); + inputSlots.push_back(serializer::CreateInputSlot(m_flatBufferBuilder, slotIndex, &conn, isOverridden, + overriddenTensorInfo)); } return inputSlots; } diff --git a/src/armnnSerializer/test/SerializerTestUtils.cpp b/src/armnnSerializer/test/SerializerTestUtils.cpp index 187384777d..c0f90f9edf 100644 --- a/src/armnnSerializer/test/SerializerTestUtils.cpp +++ b/src/armnnSerializer/test/SerializerTestUtils.cpp @@ -49,7 +49,7 @@ void LayerVerifierBase::VerifyNameAndConnections(const armnn::IConnectableLayer* const armnn::IOutputSlot* connectedOutput = layer->GetInputSlot(i).GetConnection(); CHECK(connectedOutput); - const armnn::TensorInfo& connectedInfo = connectedOutput->GetTensorInfo(); + const armnn::TensorInfo& connectedInfo = layer->GetInputSlot(i).GetTensorInfo(); CHECK(connectedInfo.GetShape() == m_InputTensorInfos[i].GetShape()); CHECK(GetDataTypeName(connectedInfo.GetDataType()) == GetDataTypeName(m_InputTensorInfos[i].GetDataType())); diff --git a/src/armnnSerializer/test/SerializerTests.cpp b/src/armnnSerializer/test/SerializerTests.cpp index 7a988ee134..49971d2ecc 100644 --- a/src/armnnSerializer/test/SerializerTests.cpp +++ b/src/armnnSerializer/test/SerializerTests.cpp @@ -3008,4 +3008,34 @@ TEST_CASE("SerializeDeserializeNonLinearNetwork") deserializedNetwork->ExecuteStrategy(verifier); } +TEST_CASE("SerializeOverriddenSlot") +{ + const std::string layerName("subtraction"); + const armnn::TensorInfo info({ 1, 4 }, armnn::DataType::Float32); + const armnn::TensorInfo incompatibleInfo({ 4, 1 }, armnn::DataType::Float32); + + armnn::INetworkPtr network = armnn::INetwork::Create(); + armnn::IConnectableLayer* const inputLayer0 = network->AddInputLayer(0); + armnn::IConnectableLayer* const inputLayer1 = network->AddInputLayer(1); + armnn::IConnectableLayer* const subtractionLayer = network->AddElementwiseBinaryLayer(armnn::BinaryOperation::Sub, + layerName.c_str()); + armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0); + + inputLayer0->GetOutputSlot(0).Connect(subtractionLayer->GetInputSlot(0)); + inputLayer1->GetOutputSlot(0).Connect(subtractionLayer->GetInputSlot(1)); + subtractionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + inputLayer0->GetOutputSlot(0).SetTensorInfo(info); + inputLayer1->GetOutputSlot(0).SetTensorInfo(incompatibleInfo); + subtractionLayer->GetInputSlot(1).SetTensorInfo(info); + subtractionLayer->GetOutputSlot(0).SetTensorInfo(info); + + armnn::INetworkPtr deserializedNetwork = DeserializeNetwork(SerializeNetwork(*network)); + CHECK(deserializedNetwork); + + LayerVerifierBase verifier(layerName, {info, info}, {info}); + deserializedNetwork->ExecuteStrategy(verifier); +} + + } diff --git a/src/armnnTestUtils/TestUtils.hpp b/src/armnnTestUtils/TestUtils.hpp index 9fea61b264..acd0d1bce4 100644 --- a/src/armnnTestUtils/TestUtils.hpp +++ b/src/armnnTestUtils/TestUtils.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2021-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -12,6 +12,24 @@ void Connect(armnn::IConnectableLayer* from, armnn::IConnectableLayer* to, const armnn::TensorInfo& tensorInfo, unsigned int fromIndex = 0, unsigned int toIndex = 0); +class LayerNameAndTypeCheck +{ +public: + LayerNameAndTypeCheck(armnn::LayerType layerType, const char* name) + : m_layerType(layerType) + , m_name(name) + {} + + bool operator()(const armnn::Layer* const layer) + { + return (layer->GetNameStr() == m_name && + layer->GetType() == m_layerType); + } +private: + armnn::LayerType m_layerType; + const char* m_name; +}; + template bool IsLayerOfType(const armnn::Layer* const layer) { diff --git a/src/backends/backendsCommon/OptimizationViews.cpp b/src/backends/backendsCommon/OptimizationViews.cpp index d887cbc93c..a358f88520 100644 --- a/src/backends/backendsCommon/OptimizationViews.cpp +++ b/src/backends/backendsCommon/OptimizationViews.cpp @@ -40,6 +40,12 @@ bool OptimizationViews::Validate(const armnn::SubgraphView& originalSubgraph) co successful.m_SubstitutableSubgraph.GetIConnectableLayers().begin(), successful.m_SubstitutableSubgraph.GetIConnectableLayers().end()); } + for (auto& successful : m_DeletedSubgraphs) + { + countedLayers.insert(countedLayers.end(), + successful.GetIConnectableLayers().begin(), + successful.GetIConnectableLayers().end()); + } countedLayers.sort(); // Compare the two lists to make sure they match @@ -58,7 +64,7 @@ bool OptimizationViews::Validate(const armnn::SubgraphView& originalSubgraph) co for (auto& substitution : m_SuccesfulOptimizations) { bool validSubstitution = true; - const SubgraphView& replacement = substitution.m_ReplacementSubgraph; + const SubgraphView &replacement = substitution.m_ReplacementSubgraph; const SubgraphView& old = substitution.m_SubstitutableSubgraph; validSubstitution &= replacement.GetIInputSlots().size() == old.GetIInputSlots().size(); validSubstitution &= replacement.GetIOutputSlots().size() == old.GetIOutputSlots().size(); diff --git a/src/backends/backendsCommon/SubgraphUtils.hpp b/src/backends/backendsCommon/SubgraphUtils.hpp index bd3d698a98..ade4b63976 100644 --- a/src/backends/backendsCommon/SubgraphUtils.hpp +++ b/src/backends/backendsCommon/SubgraphUtils.hpp @@ -1,10 +1,12 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once +#include +#include #include namespace armnn @@ -13,6 +15,118 @@ namespace armnn namespace { +/// Checks if a Layer has a DataLayout that is either NCHW or NCDHW. +class CheckForNCHW : public StrategyBase +{ +public: + CheckForNCHW() + {} + + void ExecuteStrategy(const armnn::IConnectableLayer* layer, + const armnn::BaseDescriptor& descriptor, + const std::vector& constants, + const char* name, + const armnn::LayerBindingId id = 0) override + { + armnn::IgnoreUnused(layer, constants, id, name); + switch (layer->GetType()) + { + case armnn::LayerType::BatchMatMul: + { + auto desc = static_cast(descriptor); + m_Result = desc.m_DataLayoutX == DataLayout::NCHW || desc.m_DataLayoutY == DataLayout::NCHW; + break; + } + case armnn::LayerType::BatchNormalization: + { + CheckDescForNCHW(static_cast(descriptor)); + break; + } + case armnn::LayerType::BatchToSpaceNd: + { + CheckDescForNCHW(static_cast(descriptor)); + break; + } + case armnn::LayerType::Convolution2d: + { + CheckDescForNCHW(static_cast(descriptor)); + break; + } + case armnn::LayerType::Convolution3d: + { + CheckDescForNCHW(static_cast(descriptor)); + break; + } + case armnn::LayerType::DepthwiseConvolution2d: + { + CheckDescForNCHW(static_cast(descriptor)); + break; + } + case armnn::LayerType::InstanceNormalization: + { + CheckDescForNCHW(static_cast(descriptor)); + break; + } + case armnn::LayerType::L2Normalization: + { + CheckDescForNCHW(static_cast(descriptor)); + break; + } + case armnn::LayerType::Normalization: + { + CheckDescForNCHW(static_cast(descriptor)); + break; + } + case armnn::LayerType::Pooling2d: + { + CheckDescForNCHW(static_cast(descriptor)); + break; + } + case armnn::LayerType::Pooling3d: + { + CheckDescForNCHW(static_cast(descriptor)); + break; + } + case armnn::LayerType::SpaceToBatchNd: + { + CheckDescForNCHW(static_cast(descriptor)); + break; + } + case armnn::LayerType::SpaceToDepth: + { + CheckDescForNCHW(static_cast(descriptor)); + break; + } + case armnn::LayerType::StridedSlice: + { + CheckDescForNCHW(static_cast(descriptor)); + break; + } + default: + { + m_Result = false; + } + } + } + + /// Returns true if the Layer had a DataLayout and it was NCHW or NCDHW. + /// Returns false if the Layer either doesn't have a DataLayout or if it + /// had a DataLayout that was neither NCHW nor NCDHW. + bool Result() + { + return m_Result; + } + +private: + template + void CheckDescForNCHW(const Descriptor& descriptor) + { + m_Result = (descriptor.m_DataLayout == DataLayout::NCHW) || (descriptor.m_DataLayout == DataLayout::NCDHW); + } + + bool m_Result = false; +}; + // // this helper only works if all layers where the inputs connect to are not selected // @@ -49,6 +163,13 @@ SubgraphView::IOutputSlots CreateIOutputsFrom(const std::vector untouched) { std::vector untouchedVector; @@ -78,22 +199,69 @@ LayerType* FoldPadLayer(OptimizationViews& optimizationViews, return replacementLayer; } +inline void RemoveReshapeLayer(ReshapeLayer* baseLayer, + std::map& untouched, + OptimizationViews& optimizationViews) +{ + if (baseLayer == nullptr) + { + return; + } + ReshapeDescriptor reshapeDescriptor = baseLayer->GetParameters(); + Layer& parentLayer = baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer(); + + // Cannot currently remove the Reshape if it's connected to an Input, Constant or Splitter + if (parentLayer.GetType() == LayerType::Input || parentLayer.GetType() == LayerType::Constant) + { + return; + } + + // Cannot currently remove the Reshape if it's connected to an OutputSlot or Concat + for (unsigned int i = 0; i < baseLayer->GetOutputSlot(0).GetNumConnections(); ++i) + { + Layer& nextLayer = baseLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer(); + + if (nextLayer.GetType() == LayerType::Output) + { + return; + } + } + auto it = untouched.find(baseLayer->GetGuid()); + if (it == untouched.end()) + { + // Already removed from map + return; + } + untouched.erase(it); + + // Override the InputSlot TensorInfos for all the layers connected to the Reshape's OutputSlot + for (unsigned int i = 0; i < baseLayer->GetOutputSlot(0).GetNumConnections(); ++i) + { + Layer& nextLayer = baseLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer(); + auto inputIndex = baseLayer->GetOutputSlot(0).GetConnection(i)->GetSlotIndex(); + TensorInfo reshapeInfo(baseLayer->GetOutputSlot(0).GetTensorInfo()); + reshapeInfo.SetShape(reshapeDescriptor.m_TargetShape); + nextLayer.GetInputSlot(inputIndex).SetTensorInfo(reshapeInfo); + } + optimizationViews.AddDeletedSubgraph(baseLayer); +} + template LayerType* FoldPadIntoAveragePool2d(OptimizationViews& optimizationViews, Pooling2dLayer* baseLayer, Pooling2dDescriptor& poolDescriptor, PadLayer* padLayer) { - IConnectableLayer* replacement = - optimizationViews.GetINetwork()->AddPooling2dLayer(poolDescriptor, "folded-pad-into-pool2d"); - LayerType* replacementLayer = PolymorphicDowncast(replacement); + IConnectableLayer* replacement = + optimizationViews.GetINetwork()->AddPooling2dLayer(poolDescriptor, "folded-pad-into-pool2d"); + LayerType* replacementLayer = PolymorphicDowncast(replacement); - FoldPadLayer(optimizationViews, - baseLayer, - replacementLayer, - padLayer); + FoldPadLayer(optimizationViews, + baseLayer, + replacementLayer, + padLayer); - return replacementLayer; + return replacementLayer; } } // namespace armnn diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp index 37f9382d6e..ac4bcc90f6 100644 --- a/src/backends/backendsCommon/WorkloadFactory.cpp +++ b/src/backends/backendsCommon/WorkloadFactory.cpp @@ -1122,7 +1122,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, case LayerType::ReverseV2: { auto cLayer = PolymorphicDowncast(&layer); - const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& input = layer.GetInputSlot(0).GetTensorInfo(); const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); result = layerSupportObject.IsReverseV2Supported(OverrideDataType(input, dataType), OverrideDataType(output, dataType), @@ -1413,9 +1413,9 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, // All inputs. const TensorInfo& input = OverrideDataType(layer.GetInputSlot(0).GetTensorInfo(), dataType); - const TensorInfo& outputStateIn = OverrideDataType(layer.GetInputSlot(1).GetConnection()->GetTensorInfo(), + const TensorInfo& outputStateIn = OverrideDataType(layer.GetInputSlot(1).GetTensorInfo(), dataType); - const TensorInfo& cellStateIn = OverrideDataType(layer.GetInputSlot(2).GetConnection()->GetTensorInfo(), + const TensorInfo& cellStateIn = OverrideDataType(layer.GetInputSlot(2).GetTensorInfo(), dataType); // Outputs const TensorInfo& outputStateOut = OverrideDataType(layer.GetOutputSlot(0).GetTensorInfo(), dataType); diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt index 0139044432..8d6891a68d 100644 --- a/src/backends/backendsCommon/test/CMakeLists.txt +++ b/src/backends/backendsCommon/test/CMakeLists.txt @@ -59,6 +59,7 @@ list(APPEND armnnBackendsCommonUnitTests_sources SpaceToDepthEndToEndTestImpl.hpp SplitterEndToEndTestImpl.hpp StridedSliceAsyncEndToEndTest.hpp + SubgraphUtilsTest.hpp SubtractionEndToEndTestImpl.hpp TransposeEndToEndTestImpl.hpp TensorCopyUtils.hpp diff --git a/src/backends/backendsCommon/test/SubgraphUtilsTest.hpp b/src/backends/backendsCommon/test/SubgraphUtilsTest.hpp new file mode 100644 index 0000000000..957726797b --- /dev/null +++ b/src/backends/backendsCommon/test/SubgraphUtilsTest.hpp @@ -0,0 +1,399 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include "backendsCommon/SubgraphUtils.hpp" + +namespace armnn +{ + +template, typename TOutput = ResolveType> +void EndToEndLayerTest(IRuntimePtr runtime, + IOptimizedNetworkPtr optNet, + const std::map>& inputTensorData, + const std::map>& expectedOutputData, + float tolerance = 0.000001f) +{ + // Loads it into the runtime. + NetworkId netId; + std::string errorMessage; + armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage); + CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage); + + InputTensors inputTensors; + inputTensors.reserve(inputTensorData.size()); + for (auto&& it : inputTensorData) + { + inputTensors.push_back({it.first, + ConstTensor(runtime->GetInputTensorInfo(netId, it.first), it.second.data())}); + } + OutputTensors outputTensors; + outputTensors.reserve(expectedOutputData.size()); + std::map> outputStorage; + for (auto&& it : expectedOutputData) + { + std::vector out(it.second.size()); + outputStorage.emplace(it.first, out); + outputTensors.push_back({it.first, + Tensor(runtime->GetOutputTensorInfo(netId, it.first), + outputStorage.at(it.first).data())}); + } + + // Does the inference. + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // Checks the results. + for (auto&& it : expectedOutputData) + { + std::vector out = outputStorage.at(it.first); + for (unsigned int i = 0; i < out.size(); ++i) + { + CHECK_MESSAGE(Compare(it.second[i], out[i], tolerance) == true, + "Actual output: " << out[i] << ". Expected output:" << it.second[i]); + + } + } +} + +template> +armnn::INetworkPtr CreateReshapeInOutNetwork(const armnn::TensorShape& inputShape, + const armnn::TensorShape& outputShape, + ReshapeDescriptor& descriptor, + const float qScale = 1.0f, + const int32_t qOffset = 0) +{ + armnn::INetworkPtr network(armnn::INetwork::Create()); + + TensorInfo inputTensorInfo(inputShape, ArmnnType, qScale, qOffset, true); + TensorInfo outputTensorInfo(outputShape, ArmnnType, qScale, qOffset); + + IConnectableLayer* activation0 = network->AddActivationLayer(ActivationFunction::ReLu, "act0"); + IConnectableLayer* activation1 = network->AddActivationLayer(ActivationFunction::ReLu, "act1"); + IConnectableLayer* activation2 = network->AddActivationLayer(ActivationFunction::ReLu, "act2"); + IConnectableLayer* activation3 = network->AddActivationLayer(ActivationFunction::ReLu, "act3"); + IConnectableLayer* reshape = network->AddReshapeLayer(descriptor, "reshape"); + + IConnectableLayer* input = network->AddInputLayer(0, "input"); + IConnectableLayer* output1 = network->AddOutputLayer(0, "output1"); + IConnectableLayer* output2 = network->AddOutputLayer(1, "output2"); + IConnectableLayer* output3 = network->AddOutputLayer(2, "output3"); + + Connect(input, activation0, inputTensorInfo, 0, 0); + Connect(activation0, reshape, inputTensorInfo, 0, 0); + + Connect(reshape, activation1, outputTensorInfo, 0, 0); + Connect(reshape, activation2, outputTensorInfo, 0, 0); + Connect(reshape, activation3, outputTensorInfo, 0, 0); + Connect(activation1, output1, outputTensorInfo, 0, 0); + Connect(activation2, output2, outputTensorInfo, 0, 0); + Connect(activation3, output3, outputTensorInfo, 0, 0); + + return network; +} + +template> +armnn::INetworkPtr CreateReshapeConv2dInOutNetwork(const armnn::TensorShape& inputShape, + const armnn::TensorShape& weightsShape, + const armnn::TensorShape& convOutputShape, + const armnn::TensorShape& outputShape, + std::vector& weightsData, + ReshapeDescriptor& descriptor, + Convolution2dDescriptor& convolution2DDescriptor, + bool convFirst, + const float qScale = 1.0f, + const int32_t qOffset = 0) +{ + armnn::INetworkPtr network(armnn::INetwork::Create()); + TensorInfo weightsTensorInfo(weightsShape, ArmnnType, qScale, qOffset, true); + ConstTensor weights(weightsTensorInfo, weightsData); + + IConnectableLayer* convolution1 = network->AddConvolution2dLayer(convolution2DDescriptor, "conv2d"); + IConnectableLayer* weightsLayer = network->AddConstantLayer(weights, "weights"); + + IConnectableLayer* activation1 = network->AddActivationLayer(ActivationFunction::ReLu, "act"); + IConnectableLayer* reshape = network->AddReshapeLayer(descriptor, "reshape"); + + IConnectableLayer* input = network->AddInputLayer(0, "input"); + IConnectableLayer* output = network->AddOutputLayer(0, "output"); + + TensorInfo inputTensorInfo(inputShape, ArmnnType, qScale, qOffset, true); + TensorInfo convTensorInfo(convOutputShape, ArmnnType, qScale, qOffset, true); + TensorInfo outputTensorInfo(outputShape, ArmnnType, qScale, qOffset); + TensorInfo reshapeTensorInfo(descriptor.m_TargetShape, ArmnnType, qScale, qOffset, true); + + if (convFirst) + { + Connect(input, convolution1, inputTensorInfo, 0, 0); + Connect(weightsLayer, convolution1, weightsTensorInfo, 0, 1); + Connect(convolution1, reshape, convTensorInfo, 0, 0); + Connect(reshape, activation1, reshapeTensorInfo, 0, 0); + Connect(activation1, output, outputTensorInfo, 0, 0); + } + else + { + Connect(input, activation1, inputTensorInfo, 0, 0); + Connect(activation1, reshape, inputTensorInfo, 0, 0); + Connect(reshape, convolution1, reshapeTensorInfo, 0, 0); + Connect(weightsLayer, convolution1, weightsTensorInfo, 0, 1); + Connect(convolution1, output, outputTensorInfo, 0, 0); + } + return network; +} + +template> +void ReshapeRemovalEndToEnd(const std::vector& backends) +{ + using namespace armnn; + + const TensorShape& inputShape = { 2, 3 }; + const TensorShape& outputShape = { 6 }; + + ReshapeDescriptor descriptor; + descriptor.m_TargetShape = outputShape; + + INetworkPtr network = CreateReshapeInOutNetwork(inputShape, outputShape, descriptor); + + CHECK(network); + + std::vector data{ 1, 2, 3, + 4, 5, 6 }; + + std::map> inputTensorData = { { 0, data } }; + std::map> expectedOutputData = { { 0, data }, { 1, data }, { 2, data } }; + + // Create runtime in which test will run + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // optimize the network + IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec()); + + Graph& graph = GetGraphForTesting(optNet.get()); + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + LayerNameAndTypeCheck(LayerType::Input, "input"), + LayerNameAndTypeCheck(LayerType::Activation, "act0"), + LayerNameAndTypeCheck(LayerType::Activation, "act1"), + LayerNameAndTypeCheck(LayerType::Activation, "act2"), + LayerNameAndTypeCheck(LayerType::Activation, "act3"), + LayerNameAndTypeCheck(LayerType::Output, "output1"), + LayerNameAndTypeCheck(LayerType::Output, "output2"), + LayerNameAndTypeCheck(LayerType::Output, "output3"))); + + EndToEndLayerTest(std::move(runtime), std::move(optNet), inputTensorData, expectedOutputData); +} + +template> +void ReshapeRemovalNCHWEndToEnd(const std::vector& backends, bool shouldBeRemoved, bool convFirst) +{ + using namespace armnn; + + // shapes are different if convFirst or not + //these are convFirst + TensorShape inputShape; + TensorShape convOutputShape; + TensorShape weightsShape; + TensorShape reshapeShape; + TensorShape outputShape; + + if (convFirst) + { + inputShape = { 1, 1, 5, 5 }; + convOutputShape = { 1, 1, 3, 3 }; + weightsShape = { 1, 1, 3, 3 }; + reshapeShape = { 9 }; + outputShape = { 9 }; + } + else + { + inputShape = { 5, 5 }; + reshapeShape = { 1, 1, 5, 5 }; + convOutputShape = { 1, 1, 3, 3 }; + weightsShape = { 1, 1, 3, 3 }; + outputShape = { 1, 1, 3, 3 }; + } + + ReshapeDescriptor descriptor; + descriptor.m_TargetShape = reshapeShape; + + Convolution2dDescriptor convolution2DDescriptor; + convolution2DDescriptor.m_PadLeft = 0; + convolution2DDescriptor.m_PadRight = 0; + convolution2DDescriptor.m_PadTop = 0; + convolution2DDescriptor.m_PadBottom = 0; + convolution2DDescriptor.m_StrideX = 1; + convolution2DDescriptor.m_StrideY = 1; + convolution2DDescriptor.m_DataLayout = DataLayout::NCHW; + convolution2DDescriptor.m_BiasEnabled = false; + + TensorInfo inputInfo(inputShape, DataType::Float32, true); + TensorInfo outputInfo(convOutputShape, DataType::Float32); + TensorInfo weightsInfo(weightsShape, DataType::Float32, true); + + std::vector inputData = + { + 1.0f, 8.0f, 3.0f, 4.0f, 6.0f, + 5.0f, 7.0f, 3.0f, 1.0f, 8.0f, + 2.0f, 3.0f, 9.0f, 8.0f, 1.0f, + 3.0f, 6.0f, 1.0f, 1.0f, 9.0f, + 5.0f, 3.0f, 9.0f, 3.0f, 2.0f + }; + + std::vector weightsData = + { + 4.0f, 0.0f, 3.0f, + 5.0f, 0.0f, 2.0f, + 6.0f, 0.0f, 1.0f + }; + + std::vector outputData = + { + 65.0f, 107.0f, 116.0f, + 76.0f, 99.0f, 98.0f, + 91.0f, 89.0f, 118.0f + }; + + INetworkPtr network = CreateReshapeConv2dInOutNetwork(inputShape, + weightsShape, + convOutputShape, + outputShape, + weightsData, + descriptor, + convolution2DDescriptor, + convFirst); + CHECK(network); + + std::map> inputTensorData = { { 0, inputData } }; + std::map> expectedOutputData = { { 0, outputData } }; + + // Create runtime in which test will run + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // optimize the network + IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec()); + + Graph& graph = GetGraphForTesting(optNet.get()); + + if (shouldBeRemoved) + { + if (convFirst) + { + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + LayerNameAndTypeCheck(LayerType::Input, "input"), + LayerNameAndTypeCheck(LayerType::Constant, "weights"), + LayerNameAndTypeCheck(LayerType::Convolution2d, "conv2d"), + LayerNameAndTypeCheck(LayerType::Activation, "act"), + LayerNameAndTypeCheck(LayerType::Output, "output"))); + } + else + { + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + LayerNameAndTypeCheck(LayerType::Input, "input"), + LayerNameAndTypeCheck(LayerType::Constant, "weights"), + LayerNameAndTypeCheck(LayerType::Activation, "act"), + LayerNameAndTypeCheck(LayerType::Convolution2d, "conv2d"), + LayerNameAndTypeCheck(LayerType::Output, "output"))); + } + } + else + { + if (convFirst) + { + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + LayerNameAndTypeCheck(LayerType::Input, "input"), + LayerNameAndTypeCheck(LayerType::Constant, "weights"), + LayerNameAndTypeCheck(LayerType::Convolution2d, "conv2d"), + LayerNameAndTypeCheck(LayerType::Reshape, "reshape"), + LayerNameAndTypeCheck(LayerType::Activation, "act"), + LayerNameAndTypeCheck(LayerType::Output, "output"))); + } + else + { + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + LayerNameAndTypeCheck(LayerType::Input, "input"), + LayerNameAndTypeCheck(LayerType::Constant, "weights"), + LayerNameAndTypeCheck(LayerType::Activation, "act"), + LayerNameAndTypeCheck(LayerType::Reshape, "reshape"), + LayerNameAndTypeCheck(LayerType::Convolution2d, "conv2d"), + LayerNameAndTypeCheck(LayerType::Output, "output"))); + } + } + + EndToEndLayerTest(std::move(runtime), std::move(optNet), inputTensorData, expectedOutputData); +} + +template +void CheckIsNCHW() +{ + Graph graph; + Descriptor nchwDesc; + nchwDesc.m_DataLayout = DataLayout::NCHW; + auto nchwLayer = graph.AddLayer(nchwDesc, ""); + CHECK(IsNCHW(*nchwLayer)); + + Descriptor nhwcDesc; + nhwcDesc.m_DataLayout = DataLayout::NHWC; + auto nhwcLayer = graph.AddLayer(nhwcDesc, ""); + CHECK_FALSE(IsNCHW(*nhwcLayer)); +} + +TEST_CASE("CheckIsNCHW") +{ + Graph graph; + BatchMatMulDescriptor descriptor1; + descriptor1.m_DataLayoutX = DataLayout::NHWC; + descriptor1.m_DataLayoutY = DataLayout::NHWC; + auto batchMatMulLayer1 = graph.AddLayer(descriptor1, ""); + CHECK_FALSE(IsNCHW(*batchMatMulLayer1)); + + BatchMatMulDescriptor descriptor2; + descriptor2.m_DataLayoutX = DataLayout::NCHW; + descriptor2.m_DataLayoutY = DataLayout::NHWC; + auto batchMatMulLayer2 = graph.AddLayer(descriptor2, ""); + CHECK(IsNCHW(*batchMatMulLayer2)); + + BatchMatMulDescriptor descriptor3; + descriptor3.m_DataLayoutX = DataLayout::NHWC; + descriptor3.m_DataLayoutY = DataLayout::NCHW; + auto batchMatMulLayer3 = graph.AddLayer(descriptor3, ""); + CHECK(IsNCHW(*batchMatMulLayer3)); + + BatchMatMulDescriptor descriptor4; + descriptor4.m_DataLayoutX = DataLayout::NCHW; + descriptor4.m_DataLayoutY = DataLayout::NCHW; + auto batchMatMulLayer4 = graph.AddLayer(descriptor4, ""); + CHECK(IsNCHW(*batchMatMulLayer4)); + + CheckIsNCHW(); + CheckIsNCHW(); + CheckIsNCHW(); + CheckIsNCHW(); + CheckIsNCHW(); + CheckIsNCHW(); + CheckIsNCHW(); + CheckIsNCHW(); + CheckIsNCHW(); + CheckIsNCHW(); + CheckIsNCHW(); + CheckIsNCHW(); + + // Check Default + auto elementwiseLayer = graph.AddLayer(BinaryOperation::Add, ""); + CHECK_FALSE(IsNCHW(*elementwiseLayer)); +} + + +} // Namespace diff --git a/src/backends/cl/CMakeLists.txt b/src/backends/cl/CMakeLists.txt index 20c42061fc..cc71069910 100644 --- a/src/backends/cl/CMakeLists.txt +++ b/src/backends/cl/CMakeLists.txt @@ -39,6 +39,7 @@ if(ARMCOMPUTECL) ClLayerSupport.cpp ClLayerSupport.hpp ClRegistryInitializer.cpp + ClTensorHandle.cpp ClTensorHandle.hpp ClTensorHandleFactory.cpp ClTensorHandleFactory.hpp diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp index a10b6fbb43..b018654288 100644 --- a/src/backends/cl/ClBackend.cpp +++ b/src/backends/cl/ClBackend.cpp @@ -455,6 +455,7 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph, replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma); replacementLayer->m_Mean = std::move(baseLayer->m_Mean); replacementLayer->m_Variance = std::move(baseLayer->m_Variance); + untouched.erase(baseLayer->GetGuid()); untouched.erase(activationLayer->GetGuid()); } @@ -476,6 +477,7 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph, activationLayer, activationDesc, name); + untouched.erase(baseLayer->GetGuid()); untouched.erase(activationLayer->GetGuid()); } @@ -623,6 +625,8 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph, activationDesc, BinaryOperation::Sub, name); + untouched.erase(baseLayer->GetGuid()); + untouched.erase(activationLayer->GetGuid()); } } // No fusion available for other BinaryOperations @@ -678,7 +682,7 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph, } } - if (optimizationViews.GetSubstitutions().empty()) + if (optimizationViews.GetSubstitutions().empty() && optimizationViews.GetDeletedSubgraphs().empty()) { optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph)); } diff --git a/src/backends/cl/ClTensorHandle.cpp b/src/backends/cl/ClTensorHandle.cpp new file mode 100644 index 0000000000..ccc8f6effc --- /dev/null +++ b/src/backends/cl/ClTensorHandle.cpp @@ -0,0 +1,82 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClTensorHandle.hpp" + +#include + +namespace armnn +{ + std::shared_ptr ClTensorHandle::DecorateTensorHandle(const TensorInfo& tensorInfo) + { + auto* parent = const_cast(this); + auto decorated = std::make_shared(parent, tensorInfo); + m_Decorated.emplace_back(decorated); + return decorated; + } + + ClTensorDecorator::ClTensorDecorator() + : m_Original(nullptr), m_TensorInfo() + { + } + + ClTensorDecorator::ClTensorDecorator(arm_compute::ICLTensor* original, const TensorInfo& tensorInfo) + : m_Original(nullptr), m_TensorInfo() + { + m_TensorInfo = armcomputetensorutils::BuildArmComputeTensorInfo(tensorInfo); + m_Original = original; + } + + arm_compute::ITensorInfo* ClTensorDecorator::info() const + { + return &m_TensorInfo; + } + + arm_compute::ITensorInfo* ClTensorDecorator::info() + { + return &m_TensorInfo; + } + + const cl::Buffer& ClTensorDecorator::cl_buffer() const + { + ARM_COMPUTE_ERROR_ON(m_Original == nullptr); + return m_Original->cl_buffer(); + } + + arm_compute::ICLTensor* ClTensorDecorator::parent() + { + return nullptr; + } + + arm_compute::CLQuantization ClTensorDecorator::quantization() const + { + return m_Original->quantization(); + } + + void ClTensorDecorator::map(bool blocking) + { + arm_compute::ICLTensor::map(arm_compute::CLScheduler::get().queue(), blocking); + } + + void ClTensorDecorator::unmap() + { + arm_compute::ICLTensor::unmap(arm_compute::CLScheduler::get().queue()); + } + + uint8_t* ClTensorDecorator::do_map(cl::CommandQueue& q, bool blocking) + { + if(m_Original->buffer() == nullptr) + { + m_Original->map(q, blocking); + } + return m_Original->buffer(); + } + + void ClTensorDecorator::do_unmap(cl::CommandQueue& q) + { + m_Original->unmap(q); + } + +} \ No newline at end of file diff --git a/src/backends/cl/ClTensorHandle.hpp b/src/backends/cl/ClTensorHandle.hpp index 3d750f9059..42657341fd 100644 --- a/src/backends/cl/ClTensorHandle.hpp +++ b/src/backends/cl/ClTensorHandle.hpp @@ -1,7 +1,8 @@ // -// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // + #pragma once #include @@ -22,6 +23,7 @@ namespace armnn { +class ClTensorHandleDecorator; class ClTensorHandle : public IClTensorHandle { @@ -122,7 +124,7 @@ public: virtual bool Import(void* memory, MemorySource source) override { armnn::IgnoreUnused(memory); - if (m_ImportFlags & static_cast(source)) + if (m_ImportFlags& static_cast(source)) { throw MemoryImportException("ClTensorHandle::Incorrect import flag"); } @@ -137,6 +139,8 @@ public: return false; } + virtual std::shared_ptr DecorateTensorHandle(const TensorInfo& tensorInfo) override; + private: // Only used for testing void CopyOutTo(void* memory) const override @@ -227,6 +231,7 @@ private: MemorySourceFlags m_ImportFlags; bool m_Imported; bool m_IsImportEnabled; + std::vector> m_Decorated; }; class ClSubTensorHandle : public IClTensorHandle @@ -361,4 +366,179 @@ private: ITensorHandle* parentHandle = nullptr; }; +/** ClTensorDecorator wraps an existing CL tensor allowing us to override the TensorInfo for it */ +class ClTensorDecorator : public arm_compute::ICLTensor +{ +public: + ClTensorDecorator(); + + ClTensorDecorator(arm_compute::ICLTensor* original, const TensorInfo& info); + + ~ClTensorDecorator() = default; + + ClTensorDecorator(const ClTensorDecorator&) = delete; + + ClTensorDecorator& operator=(const ClTensorDecorator&) = delete; + + ClTensorDecorator(ClTensorDecorator&&) = default; + + ClTensorDecorator& operator=(ClTensorDecorator&&) = default; + + arm_compute::ICLTensor* parent(); + + void map(bool blocking = true); + using arm_compute::ICLTensor::map; + + void unmap(); + using arm_compute::ICLTensor::unmap; + + virtual arm_compute::ITensorInfo* info() const override; + virtual arm_compute::ITensorInfo* info() override; + const cl::Buffer& cl_buffer() const override; + arm_compute::CLQuantization quantization() const override; + +protected: + // Inherited methods overridden: + uint8_t* do_map(cl::CommandQueue& q, bool blocking) override; + void do_unmap(cl::CommandQueue& q) override; + +private: + arm_compute::ICLTensor* m_Original; + mutable arm_compute::TensorInfo m_TensorInfo; +}; + +class ClTensorHandleDecorator : public IClTensorHandle +{ +public: + ClTensorHandleDecorator(IClTensorHandle* parent, const TensorInfo& info) + : m_Tensor(&parent->GetTensor(), info) + { + m_OriginalHandle = parent; + } + + arm_compute::ICLTensor& GetTensor() override { return m_Tensor; } + arm_compute::ICLTensor const& GetTensor() const override { return m_Tensor; } + + virtual void Allocate() override {} + virtual void Manage() override {} + + virtual const void* Map(bool blocking = true) const override + { + m_Tensor.map(blocking); + return static_cast(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); + } + + virtual void Unmap() const override + { + m_Tensor.unmap(); + } + + virtual ITensorHandle* GetParent() const override { return nullptr; } + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } + + virtual void SetMemoryGroup(const std::shared_ptr&) override {} + + TensorShape GetStrides() const override + { + return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); + } + + TensorShape GetShape() const override + { + return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); + } + +private: + // Only used for testing + void CopyOutTo(void* memory) const override + { + const_cast(this)->Map(true); + switch(this->GetDataType()) + { + case arm_compute::DataType::F32: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + case arm_compute::DataType::U8: + case arm_compute::DataType::QASYMM8: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + case arm_compute::DataType::F16: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + case arm_compute::DataType::QSYMM8: + case arm_compute::DataType::QSYMM8_PER_CHANNEL: + case arm_compute::DataType::QASYMM8_SIGNED: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + case arm_compute::DataType::S16: + case arm_compute::DataType::QSYMM16: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + case arm_compute::DataType::S32: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + const_cast(this)->Unmap(); + } + + // Only used for testing + void CopyInFrom(const void* memory) override + { + this->Map(true); + switch(this->GetDataType()) + { + case arm_compute::DataType::F32: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + case arm_compute::DataType::U8: + case arm_compute::DataType::QASYMM8: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + case arm_compute::DataType::F16: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + case arm_compute::DataType::QSYMM8: + case arm_compute::DataType::QSYMM8_PER_CHANNEL: + case arm_compute::DataType::QASYMM8_SIGNED: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + case arm_compute::DataType::S16: + case arm_compute::DataType::QSYMM16: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + case arm_compute::DataType::S32: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + this->Unmap(); + } + + mutable ClTensorDecorator m_Tensor; + IClTensorHandle* m_OriginalHandle = nullptr; +}; + } // namespace armnn diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk index 03f1a9540d..f4b9fac740 100644 --- a/src/backends/cl/backend.mk +++ b/src/backends/cl/backend.mk @@ -23,6 +23,7 @@ BACKEND_SOURCES := \ ClImportTensorHandleFactory.cpp \ ClLayerSupport.cpp \ ClRegistryInitializer.cpp \ + ClTensorHandle.cpp \ ClTensorHandleFactory.cpp \ ClWorkloadFactory.cpp \ OpenClTimer.cpp \ diff --git a/src/backends/cl/test/ClEndToEndTests.cpp b/src/backends/cl/test/ClEndToEndTests.cpp index 091526fd2b..6342cbc48a 100644 --- a/src/backends/cl/test/ClEndToEndTests.cpp +++ b/src/backends/cl/test/ClEndToEndTests.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -60,18 +61,18 @@ TEST_CASE("ClAdditionEndToEndUint8Test") } // Power -TEST_CASE("RefPowerEndToEndTestFloat32") +TEST_CASE("ClPowerEndToEndTestFloat32") { ElementwiseBinarySimpleEndToEnd(clDefaultBackends, BinaryOperation::Power); } // SqDiff -TEST_CASE("RefSquaredDifferenceEndToEndTestFloat32") +TEST_CASE("ClSquaredDifferenceEndToEndTestFloat32") { ElementwiseBinarySimpleEndToEnd(clDefaultBackends, BinaryOperation::SqDiff); } -TEST_CASE("RefSquaredDifferenceEndToEndTestUint8") +TEST_CASE("ClSquaredDifferenceEndToEndTestUint8") { ElementwiseBinarySimpleEndToEnd(clDefaultBackends, BinaryOperation::SqDiff); } diff --git a/src/backends/neon/CMakeLists.txt b/src/backends/neon/CMakeLists.txt index 16164de3fb..5934221ec1 100644 --- a/src/backends/neon/CMakeLists.txt +++ b/src/backends/neon/CMakeLists.txt @@ -16,6 +16,7 @@ if(ARMCOMPUTENEON) NeonLayerSupport.hpp NeonRegistryInitializer.cpp NeonTensorHandle.hpp + NeonTensorHandle.cpp NeonTensorHandleFactory.cpp NeonTensorHandleFactory.hpp NeonTimer.hpp diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp index cea2aa3eba..098b1ff109 100644 --- a/src/backends/neon/NeonBackend.cpp +++ b/src/backends/neon/NeonBackend.cpp @@ -505,9 +505,39 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph untouched.erase(baseLayer->GetGuid()); } } + + // Remove Reshape where possible + if (base.GetType() == LayerType::Reshape) + { + ReshapeLayer* baseLayer = PolymorphicDowncast(&base); + Layer& parentLayer = baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer(); + + // Cannot currently remove the Reshape if it's connected to any layer that has an NCHW layout + if (IsNCHW(parentLayer)) + { + continue; + } + bool isNCHW = false; + + for (unsigned int i = 0; i < baseLayer->GetOutputSlot(0).GetNumConnections(); ++i) + { + Layer& nextLayer = baseLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer(); + + if (IsNCHW(nextLayer)) + { + isNCHW = true; + break; + } + } + if (isNCHW) + { + continue; + } + RemoveReshapeLayer(baseLayer, untouched, optimizationViews); + } } - if (optimizationViews.GetSubstitutions().empty()) + if (optimizationViews.GetSubstitutions().empty() && optimizationViews.GetDeletedSubgraphs().empty()) { optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph)); } diff --git a/src/backends/neon/NeonTensorHandle.cpp b/src/backends/neon/NeonTensorHandle.cpp new file mode 100644 index 0000000000..819805aa59 --- /dev/null +++ b/src/backends/neon/NeonTensorHandle.cpp @@ -0,0 +1,47 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonTensorHandle.hpp" + +#include + +namespace armnn +{ +std::shared_ptr NeonTensorHandle::DecorateTensorHandle(const TensorInfo &tensorInfo) +{ + auto* parent = const_cast(this); + auto decorated = std::make_shared(parent, tensorInfo); + m_Decorated.emplace_back(decorated); + return decorated; +} + +NeonTensorDecorator::NeonTensorDecorator() + : m_Original(nullptr), m_TensorInfo() +{ +} + +NeonTensorDecorator::NeonTensorDecorator(arm_compute::ITensor *parent, const TensorInfo& tensorInfo) + : m_Original(nullptr), m_TensorInfo() +{ + m_TensorInfo = armcomputetensorutils::BuildArmComputeTensorInfo(tensorInfo); + m_Original = parent; +} + +arm_compute::ITensorInfo *NeonTensorDecorator::info() const +{ + return &m_TensorInfo; +} + +arm_compute::ITensorInfo *NeonTensorDecorator::info() +{ + return &m_TensorInfo; +} + +uint8_t *NeonTensorDecorator::buffer() const +{ + return m_Original->buffer(); +} + +} \ No newline at end of file diff --git a/src/backends/neon/NeonTensorHandle.hpp b/src/backends/neon/NeonTensorHandle.hpp index fcae77cdaa..e5f210773d 100644 --- a/src/backends/neon/NeonTensorHandle.hpp +++ b/src/backends/neon/NeonTensorHandle.hpp @@ -1,7 +1,8 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // + #pragma once #include @@ -19,9 +20,11 @@ #include #include #include +#include "armnn/TypesUtils.hpp" namespace armnn { +class NeonTensorHandleDecorator; class NeonTensorHandle : public IAclTensorHandle { @@ -125,7 +128,7 @@ public: virtual bool Import(void* memory, MemorySource source) override { - if (m_ImportFlags & static_cast(source)) + if (m_ImportFlags& static_cast(source)) { if (source == MemorySource::Malloc && m_IsImportEnabled) { @@ -181,6 +184,8 @@ public: return false; } + virtual std::shared_ptr DecorateTensorHandle(const TensorInfo& tensorInfo) override; + private: // Only used for testing void CopyOutTo(void* memory) const override @@ -275,6 +280,7 @@ private: bool m_Imported; bool m_IsImportEnabled; const uintptr_t m_TypeAlignment; + std::vector> m_Decorated; }; class NeonSubTensorHandle : public IAclTensorHandle @@ -283,7 +289,7 @@ public: NeonSubTensorHandle(IAclTensorHandle* parent, const arm_compute::TensorShape& shape, const arm_compute::Coordinates& coords) - : m_Tensor(&parent->GetTensor(), shape, coords) + : m_Tensor(&parent->GetTensor(), shape, coords, true) { parentHandle = parent; } @@ -319,6 +325,11 @@ public: return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); } + virtual std::shared_ptr DecorateTensorHandle(const TensorInfo&) override + { + return nullptr; + }; + private: // Only used for testing void CopyOutTo(void* memory) const override @@ -394,4 +405,155 @@ private: ITensorHandle* parentHandle = nullptr; }; +/// NeonTensorDecorator wraps an existing Neon tensor allowing us to override the TensorInfo for it +class NeonTensorDecorator : public arm_compute::ITensor +{ +public: + NeonTensorDecorator(); + + NeonTensorDecorator(arm_compute::ITensor* original, const TensorInfo& info); + + ~NeonTensorDecorator() = default; + + NeonTensorDecorator(const NeonTensorDecorator&) = delete; + + NeonTensorDecorator& operator=(const NeonTensorDecorator&) = delete; + + NeonTensorDecorator(NeonTensorDecorator&&) = default; + + NeonTensorDecorator& operator=(NeonTensorDecorator&&) = default; + + // Inherited methods overridden: + arm_compute::ITensorInfo* info() const override; + + arm_compute::ITensorInfo* info() override; + + uint8_t* buffer() const override; + +private: + arm_compute::ITensor* m_Original; + mutable arm_compute::TensorInfo m_TensorInfo; +}; + +class NeonTensorHandleDecorator : public IAclTensorHandle +{ +public: + NeonTensorHandleDecorator(IAclTensorHandle* parent, const TensorInfo& info) + : m_Tensor(&parent->GetTensor(), info) + { + parentHandle = parent; + } + + arm_compute::ITensor& GetTensor() override { return m_Tensor; } + arm_compute::ITensor const& GetTensor() const override { return m_Tensor; } + + virtual void Allocate() override {} + virtual void Manage() override {} + + virtual ITensorHandle* GetParent() const override { return nullptr; } + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } + + virtual void SetMemoryGroup(const std::shared_ptr&) override {} + + virtual const void* Map(bool /* blocking = true */) const override + { + return static_cast(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); + } + virtual void Unmap() const override {} + + TensorShape GetStrides() const override + { + return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); + } + + TensorShape GetShape() const override + { + return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); + } + + virtual std::shared_ptr DecorateTensorHandle(const TensorInfo&) override + { + return nullptr; + }; + +private: + // Only used for testing + void CopyOutTo(void* memory) const override + { + switch (this->GetDataType()) + { + case arm_compute::DataType::F32: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + case arm_compute::DataType::U8: + case arm_compute::DataType::QASYMM8: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + case arm_compute::DataType::QSYMM8: + case arm_compute::DataType::QASYMM8_SIGNED: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + case arm_compute::DataType::S16: + case arm_compute::DataType::QSYMM16: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + case arm_compute::DataType::S32: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + } + + // Only used for testing + void CopyInFrom(const void* memory) override + { + switch (this->GetDataType()) + { + case arm_compute::DataType::F32: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + case arm_compute::DataType::U8: + case arm_compute::DataType::QASYMM8: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + case arm_compute::DataType::QSYMM8: + case arm_compute::DataType::QASYMM8_SIGNED: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + case arm_compute::DataType::S16: + case arm_compute::DataType::QSYMM16: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + case arm_compute::DataType::S32: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + } + + NeonTensorDecorator m_Tensor; + ITensorHandle* parentHandle = nullptr; +}; + + } // namespace armnn diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk index e2439eecb7..4150845f58 100644 --- a/src/backends/neon/backend.mk +++ b/src/backends/neon/backend.mk @@ -19,6 +19,7 @@ BACKEND_SOURCES := \ NeonInterceptorScheduler.cpp \ NeonLayerSupport.cpp \ NeonRegistryInitializer.cpp \ + NeonTensorHandle.cpp \ NeonTensorHandleFactory.cpp \ NeonTimer.cpp \ NeonWorkloadFactory.cpp \ diff --git a/src/backends/neon/test/NeonEndToEndTests.cpp b/src/backends/neon/test/NeonEndToEndTests.cpp index 071ee415de..5672f8b993 100644 --- a/src/backends/neon/test/NeonEndToEndTests.cpp +++ b/src/backends/neon/test/NeonEndToEndTests.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -147,18 +148,18 @@ TEST_CASE("NeonAdditionEndToEndUint8Test") } // Power -TEST_CASE("RefPowerEndToEndTestFloat32") +TEST_CASE("NeonPowerEndToEndTestFloat32") { ElementwiseBinarySimpleEndToEnd(neonDefaultBackends, BinaryOperation::Power); } // SqDiff -TEST_CASE("RefSquaredDifferenceEndToEndTestFloat32") +TEST_CASE("NeonSquaredDifferenceEndToEndTestFloat32") { ElementwiseBinarySimpleEndToEnd(neonDefaultBackends, BinaryOperation::SqDiff); } -TEST_CASE("RefSquaredDifferenceEndToEndTestUint8") +TEST_CASE("NeonSquaredDifferenceEndToEndTestUint8") { ElementwiseBinarySimpleEndToEnd(neonDefaultBackends, BinaryOperation::SqDiff); } @@ -850,4 +851,19 @@ TEST_CASE("NeonQLstmEndToEndTest") QLstmEndToEnd(neonDefaultBackends); } +TEST_CASE("NeonReshapeRemovalSimpleCaseEndToEnd") +{ + ReshapeRemovalEndToEnd(neonDefaultBackends); +} + +TEST_CASE("NeonReshapeRemovalNCHWFirstEndToEnd") +{ + ReshapeRemovalNCHWEndToEnd(neonDefaultBackends, false, true); +} + +TEST_CASE("NeonReshapeRemovalNCHWSecondEndToEnd") +{ + ReshapeRemovalNCHWEndToEnd(neonDefaultBackends, false, false); +} + } diff --git a/src/backends/reference/RefBackend.cpp b/src/backends/reference/RefBackend.cpp index 8c8879c8be..02749af1f9 100644 --- a/src/backends/reference/RefBackend.cpp +++ b/src/backends/reference/RefBackend.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -16,8 +16,6 @@ #include #include -#include - namespace armnn { @@ -116,9 +114,16 @@ OptimizationViews RefBackend::OptimizeSubgraphView(const SubgraphView& subgraph, } } } + + // Remove Reshape where possible + if (base.GetType() == LayerType::Reshape) + { + ReshapeLayer* baseLayer = PolymorphicDowncast(&base); + RemoveReshapeLayer(baseLayer, untouched, optimizationViews); + } } - if (optimizationViews.GetSubstitutions().empty()) + if (optimizationViews.GetSubstitutions().empty() && optimizationViews.GetDeletedSubgraphs().empty()) { optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph)); } diff --git a/src/backends/reference/RefTensorHandle.cpp b/src/backends/reference/RefTensorHandle.cpp index dbfa374945..cce992c947 100644 --- a/src/backends/reference/RefTensorHandle.cpp +++ b/src/backends/reference/RefTensorHandle.cpp @@ -1,29 +1,40 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2019-2023 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // + #include "RefTensorHandle.hpp" namespace armnn { -RefTensorHandle::RefTensorHandle(const TensorInfo &tensorInfo, std::shared_ptr &memoryManager): +RefTensorHandle::RefTensorHandle(const TensorInfo& tensorInfo, std::shared_ptr& memoryManager): m_TensorInfo(tensorInfo), m_MemoryManager(memoryManager), m_Pool(nullptr), m_UnmanagedMemory(nullptr), - m_ImportedMemory(nullptr) + m_ImportedMemory(nullptr), + m_Decorated() { - } RefTensorHandle::RefTensorHandle(const TensorInfo& tensorInfo) : m_TensorInfo(tensorInfo), m_Pool(nullptr), m_UnmanagedMemory(nullptr), - m_ImportedMemory(nullptr) + m_ImportedMemory(nullptr), + m_Decorated() { +} +RefTensorHandle::RefTensorHandle(const TensorInfo& tensorInfo, const RefTensorHandle& parent) + : m_TensorInfo(tensorInfo), + m_MemoryManager(parent.m_MemoryManager), + m_Pool(parent.m_Pool), + m_UnmanagedMemory(parent.m_UnmanagedMemory), + m_ImportedMemory(parent.m_ImportedMemory), + m_Decorated() +{ } RefTensorHandle::~RefTensorHandle() @@ -139,4 +150,52 @@ bool RefTensorHandle::CanBeImported(void *memory, MemorySource source) return false; } +std::shared_ptr RefTensorHandle::DecorateTensorHandle(const TensorInfo& tensorInfo) +{ + auto decorated = std::make_shared(tensorInfo, *this); + m_Decorated.emplace_back(decorated); + return decorated; +} + +RefTensorHandleDecorator::RefTensorHandleDecorator(const TensorInfo& tensorInfo, const RefTensorHandle& parent) +: RefTensorHandle(tensorInfo) +, m_TensorInfo(tensorInfo) +, m_Parent(parent) +{ +} + +void RefTensorHandleDecorator::Manage() +{ +} + +void RefTensorHandleDecorator::Allocate() +{ +} + +const void* RefTensorHandleDecorator::Map(bool unused) const +{ + return m_Parent.Map(unused); +} + +MemorySourceFlags RefTensorHandleDecorator::GetImportFlags() const +{ + return static_cast(MemorySource::Malloc); +} + +bool RefTensorHandleDecorator::Import(void*, MemorySource ) +{ + return false; +} + +bool RefTensorHandleDecorator::CanBeImported(void* , MemorySource) +{ + return false; +} + +std::shared_ptr RefTensorHandleDecorator::DecorateTensorHandle(const TensorInfo&) +{ + return nullptr; +} + + } diff --git a/src/backends/reference/RefTensorHandle.hpp b/src/backends/reference/RefTensorHandle.hpp index b4dedd5e77..128f623cd3 100644 --- a/src/backends/reference/RefTensorHandle.hpp +++ b/src/backends/reference/RefTensorHandle.hpp @@ -1,7 +1,8 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2019-2023 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // + #pragma once #include @@ -11,14 +12,17 @@ namespace armnn { +class RefTensorHandleDecorator; // An implementation of ITensorHandle with simple "bump the pointer" memory-management behaviour class RefTensorHandle : public ITensorHandle { public: - RefTensorHandle(const TensorInfo& tensorInfo, std::shared_ptr &memoryManager); + RefTensorHandle(const TensorInfo& tensorInfo, std::shared_ptr& memoryManager); RefTensorHandle(const TensorInfo& tensorInfo); + RefTensorHandle(const TensorInfo& tensorInfo, const RefTensorHandle& parent); + ~RefTensorHandle(); virtual void Manage() override; @@ -56,6 +60,8 @@ public: virtual bool Import(void* memory, MemorySource source) override; virtual bool CanBeImported(void* memory, MemorySource source) override; + virtual std::shared_ptr DecorateTensorHandle(const TensorInfo& tensorInfo) override; + private: // Only used for testing void CopyOutTo(void*) const override; @@ -68,10 +74,86 @@ private: TensorInfo m_TensorInfo; - std::shared_ptr m_MemoryManager; + mutable std::shared_ptr m_MemoryManager; RefMemoryManager::Pool* m_Pool; mutable void* m_UnmanagedMemory; void* m_ImportedMemory; + std::vector> m_Decorated; +}; + +class RefTensorHandleDecorator : public RefTensorHandle +{ +public: + RefTensorHandleDecorator(const TensorInfo& tensorInfo, const RefTensorHandle& parent); + + ~RefTensorHandleDecorator() = default; + + virtual void Manage() override; + + virtual void Allocate() override; + + virtual ITensorHandle* GetParent() const override + { + return nullptr; + } + + virtual const void* Map(bool /* blocking = true */) const override; + using ITensorHandle::Map; + + virtual void Unmap() const override + {} + + TensorShape GetStrides() const override + { + return GetUnpaddedTensorStrides(m_TensorInfo); + } + + TensorShape GetShape() const override + { + return m_TensorInfo.GetShape(); + } + + const TensorInfo& GetTensorInfo() const + { + return m_TensorInfo; + } + + virtual MemorySourceFlags GetImportFlags() const override; + + virtual bool Import(void* memory, MemorySource source) override; + virtual bool CanBeImported(void* memory, MemorySource source) override; + + virtual std::shared_ptr DecorateTensorHandle(const TensorInfo& tensorInfo) override; + + /// Map the tensor data for access. Must be paired with call to Unmap(). + /// \param blocking hint to block the calling thread until all other accesses are complete. (backend dependent) + /// \return pointer to the first element of the mapped data. + void* Map(bool blocking=true) + { + return const_cast(static_cast(this)->Map(blocking)); + } + + /// Unmap the tensor data that was previously mapped with call to Map(). + void Unmap() + { + return static_cast(this)->Unmap(); + } + + /// Testing support to be able to verify and set tensor data content + void CopyOutTo(void* /* memory */) const override + {}; + + void CopyInFrom(const void* /* memory */) override + {}; + + /// Unimport externally allocated memory + void Unimport() override + {}; + +private: + TensorInfo m_TensorInfo; + const RefTensorHandle& m_Parent; }; } + diff --git a/src/backends/reference/test/RefCreateWorkloadTests.cpp b/src/backends/reference/test/RefCreateWorkloadTests.cpp index 894dd75ef2..13ac7fc233 100644 --- a/src/backends/reference/test/RefCreateWorkloadTests.cpp +++ b/src/backends/reference/test/RefCreateWorkloadTests.cpp @@ -1314,4 +1314,52 @@ TEST_CASE("ReplaceFunctionsfromUint8toFloat16ActivationWorkload") RefCreateActivationWorkloadReplaceFunctionsTest(); } +bool TestRefTensorHandleInfo(armnn::RefTensorHandle* handle, const armnn::TensorInfo& expectedInfo) +{ + const TensorInfo handleInfo = handle->GetTensorInfo(); + const TensorInfo expectedAclInfo = expectedInfo; + + if (handleInfo.GetDataType() != expectedAclInfo.GetDataType()) + { + return false; + } + + if (handleInfo.GetNumDimensions() != expectedAclInfo.GetNumDimensions()) + { + return false; + } + + for (unsigned int d = 0; d < expectedAclInfo.GetNumDimensions(); ++d) + { + if (handleInfo.GetShape()[d] != expectedAclInfo.GetShape()[d]) + { + return false; + } + } + + return true; +} + +TEST_CASE("RefCreateSplitterWorkload") +{ + Graph graph; + RefWorkloadFactory factory = GetFactory(); + + auto workload = CreateSplitterWorkloadTest(factory, graph); + + // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest). + SplitterQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = PolymorphicDowncast(queueDescriptor.m_Inputs[0]); + CHECK(TestRefTensorHandleInfo(inputHandle, TensorInfo({5, 7, 7}, DataType::Float32))); + + auto outputHandle0 = PolymorphicDowncast(queueDescriptor.m_Outputs[0]); + CHECK(TestRefTensorHandleInfo(outputHandle0, TensorInfo({1, 7, 7}, DataType::Float32))); + + auto outputHandle1 = PolymorphicDowncast(queueDescriptor.m_Outputs[1]); + CHECK(TestRefTensorHandleInfo(outputHandle1, TensorInfo({2, 7, 7}, DataType::Float32))); + + auto outputHandle2 = PolymorphicDowncast(queueDescriptor.m_Outputs[2]); + CHECK(TestRefTensorHandleInfo(outputHandle2, TensorInfo({2, 7, 7}, DataType::Float32))); +} + } diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp index 4bb3f2947a..eb2aabcd1e 100644 --- a/src/backends/reference/test/RefEndToEndTests.cpp +++ b/src/backends/reference/test/RefEndToEndTests.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -1618,6 +1619,22 @@ TEST_CASE("RefSquaredDifferenceEndToEndTestUint8") { ElementwiseBinarySimpleEndToEnd(defaultBackends, BinaryOperation::SqDiff); } + #endif +// Backend Optimization Tests +TEST_CASE("RefReshapeRemovalSimpleCaseEndToEnd") +{ + ReshapeRemovalEndToEnd(defaultBackends); +} + +TEST_CASE("RefReshapeRemovalNCHWFirstEndToEnd") +{ + ReshapeRemovalNCHWEndToEnd(defaultBackends, true, true); +} + +TEST_CASE("RefReshapeRemovalNCHWSecondEndToEnd") +{ + ReshapeRemovalNCHWEndToEnd(defaultBackends, true, false); +} } diff --git a/tests/ExecuteNetwork/ArmNNExecutor.cpp b/tests/ExecuteNetwork/ArmNNExecutor.cpp index 4881dff7fb..8ce3689e9f 100644 --- a/tests/ExecuteNetwork/ArmNNExecutor.cpp +++ b/tests/ExecuteNetwork/ArmNNExecutor.cpp @@ -499,7 +499,7 @@ ArmNNExecutor::IOInfo ArmNNExecutor::GetIOInfo(armnn::IOptimizedNetwork* optNet) case armnn::LayerType::Output: { m_IOInfo.m_OutputNames.emplace_back(name); - m_IOInfo.m_OutputInfoMap[name] = {id, layer->GetInputSlot(0).GetConnection()->GetTensorInfo()}; + m_IOInfo.m_OutputInfoMap[name] = {id, layer->GetInputSlot(0).GetTensorInfo()}; break; } default: {} @@ -689,6 +689,7 @@ void ArmNNExecutor::PrintOutputTensors(const armnn::OutputTensors* outputTensors break; } case armnn::DataType::QAsymmU8: + case armnn::DataType::Boolean: { PrintTensor(outputWriteInfo, "%d "); break; @@ -696,7 +697,6 @@ void ArmNNExecutor::PrintOutputTensors(const armnn::OutputTensors* outputTensors case armnn::DataType::Float16: case armnn::DataType::QSymmS16: case armnn::DataType::BFloat16: - case armnn::DataType::Boolean: case armnn::DataType::Signed64: default: { -- cgit v1.2.1