diff options
author | Mike Kelly <mike.kelly@arm.com> | 2023-07-07 15:43:06 +0100 |
---|---|---|
committer | Mike Kelly <mike.kelly@arm.com> | 2023-07-14 00:00:53 +0100 |
commit | 4cc341cf8b5a6e6bb0543504cbbfde6fa11a2cdb (patch) | |
tree | 7cac128e9ec6f2fd27f1afdb55f44b870f39e0b3 /src/backends/neon | |
parent | 6963b33221c23af4a8eff19ff4a5773230b0befd (diff) | |
download | armnn-4cc341cf8b5a6e6bb0543504cbbfde6fa11a2cdb.tar.gz |
IVGCVSW-7830 Add backend optimizations to remove Reshapes where possible
* Added optimization to remove reshapes for Neon and Ref Backends
by using overridden TensorInfos
* Added ability to delete Subgraphs during Optimization
* Fixed naming error in NeonEndToEndTests and CLEndToEndTests
* Added LayerNameAndTypeCheck for testing.
* Fixed error where layers were not marked as altered when removed in
CLBackend
Signed-off-by: Mike Kelly <mike.kelly@arm.com>
Change-Id: I1ac25cd4ec9821470d961831ae2c8d24882276cc
Diffstat (limited to 'src/backends/neon')
-rw-r--r-- | src/backends/neon/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/backends/neon/NeonBackend.cpp | 32 | ||||
-rw-r--r-- | src/backends/neon/NeonTensorHandle.cpp | 47 | ||||
-rw-r--r-- | src/backends/neon/NeonTensorHandle.hpp | 168 | ||||
-rw-r--r-- | src/backends/neon/backend.mk | 1 | ||||
-rw-r--r-- | src/backends/neon/test/NeonEndToEndTests.cpp | 22 |
6 files changed, 264 insertions, 7 deletions
diff --git a/src/backends/neon/CMakeLists.txt b/src/backends/neon/CMakeLists.txt index 16164de3fb..5934221ec1 100644 --- a/src/backends/neon/CMakeLists.txt +++ b/src/backends/neon/CMakeLists.txt @@ -16,6 +16,7 @@ if(ARMCOMPUTENEON) NeonLayerSupport.hpp NeonRegistryInitializer.cpp NeonTensorHandle.hpp + NeonTensorHandle.cpp NeonTensorHandleFactory.cpp NeonTensorHandleFactory.hpp NeonTimer.hpp diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp index cea2aa3eba..098b1ff109 100644 --- a/src/backends/neon/NeonBackend.cpp +++ b/src/backends/neon/NeonBackend.cpp @@ -505,9 +505,39 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph untouched.erase(baseLayer->GetGuid()); } } + + // Remove Reshape where possible + if (base.GetType() == LayerType::Reshape) + { + ReshapeLayer* baseLayer = PolymorphicDowncast<ReshapeLayer*>(&base); + Layer& parentLayer = baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer(); + + // Cannot currently remove the Reshape if it's connected to any layer that has an NCHW layout + if (IsNCHW(parentLayer)) + { + continue; + } + bool isNCHW = false; + + for (unsigned int i = 0; i < baseLayer->GetOutputSlot(0).GetNumConnections(); ++i) + { + Layer& nextLayer = baseLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer(); + + if (IsNCHW(nextLayer)) + { + isNCHW = true; + break; + } + } + if (isNCHW) + { + continue; + } + RemoveReshapeLayer(baseLayer, untouched, optimizationViews); + } } - if (optimizationViews.GetSubstitutions().empty()) + if (optimizationViews.GetSubstitutions().empty() && optimizationViews.GetDeletedSubgraphs().empty()) { optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph)); } diff --git a/src/backends/neon/NeonTensorHandle.cpp b/src/backends/neon/NeonTensorHandle.cpp new file mode 100644 index 0000000000..819805aa59 --- /dev/null +++ b/src/backends/neon/NeonTensorHandle.cpp @@ -0,0 +1,47 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonTensorHandle.hpp" + +#include <aclCommon/ArmComputeTensorUtils.hpp> + +namespace armnn +{ +std::shared_ptr<ITensorHandle> NeonTensorHandle::DecorateTensorHandle(const TensorInfo &tensorInfo) +{ + auto* parent = const_cast<NeonTensorHandle*>(this); + auto decorated = std::make_shared<NeonTensorHandleDecorator>(parent, tensorInfo); + m_Decorated.emplace_back(decorated); + return decorated; +} + +NeonTensorDecorator::NeonTensorDecorator() + : m_Original(nullptr), m_TensorInfo() +{ +} + +NeonTensorDecorator::NeonTensorDecorator(arm_compute::ITensor *parent, const TensorInfo& tensorInfo) + : m_Original(nullptr), m_TensorInfo() +{ + m_TensorInfo = armcomputetensorutils::BuildArmComputeTensorInfo(tensorInfo); + m_Original = parent; +} + +arm_compute::ITensorInfo *NeonTensorDecorator::info() const +{ + return &m_TensorInfo; +} + +arm_compute::ITensorInfo *NeonTensorDecorator::info() +{ + return &m_TensorInfo; +} + +uint8_t *NeonTensorDecorator::buffer() const +{ + return m_Original->buffer(); +} + +}
\ No newline at end of file diff --git a/src/backends/neon/NeonTensorHandle.hpp b/src/backends/neon/NeonTensorHandle.hpp index fcae77cdaa..e5f210773d 100644 --- a/src/backends/neon/NeonTensorHandle.hpp +++ b/src/backends/neon/NeonTensorHandle.hpp @@ -1,7 +1,8 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // + #pragma once #include <BFloat16.hpp> @@ -19,9 +20,11 @@ #include <arm_compute/runtime/SubTensor.h> #include <arm_compute/core/TensorShape.h> #include <arm_compute/core/Coordinates.h> +#include "armnn/TypesUtils.hpp" namespace armnn { +class NeonTensorHandleDecorator; class NeonTensorHandle : public IAclTensorHandle { @@ -125,7 +128,7 @@ public: virtual bool Import(void* memory, MemorySource source) override { - if (m_ImportFlags & static_cast<MemorySourceFlags>(source)) + if (m_ImportFlags& static_cast<MemorySourceFlags>(source)) { if (source == MemorySource::Malloc && m_IsImportEnabled) { @@ -181,6 +184,8 @@ public: return false; } + virtual std::shared_ptr<ITensorHandle> DecorateTensorHandle(const TensorInfo& tensorInfo) override; + private: // Only used for testing void CopyOutTo(void* memory) const override @@ -275,6 +280,7 @@ private: bool m_Imported; bool m_IsImportEnabled; const uintptr_t m_TypeAlignment; + std::vector<std::shared_ptr<NeonTensorHandleDecorator>> m_Decorated; }; class NeonSubTensorHandle : public IAclTensorHandle @@ -283,7 +289,7 @@ public: NeonSubTensorHandle(IAclTensorHandle* parent, const arm_compute::TensorShape& shape, const arm_compute::Coordinates& coords) - : m_Tensor(&parent->GetTensor(), shape, coords) + : m_Tensor(&parent->GetTensor(), shape, coords, true) { parentHandle = parent; } @@ -319,6 +325,11 @@ public: return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); } + virtual std::shared_ptr<ITensorHandle> DecorateTensorHandle(const TensorInfo&) override + { + return nullptr; + }; + private: // Only used for testing void CopyOutTo(void* memory) const override @@ -394,4 +405,155 @@ private: ITensorHandle* parentHandle = nullptr; }; +/// NeonTensorDecorator wraps an existing Neon tensor allowing us to override the TensorInfo for it +class NeonTensorDecorator : public arm_compute::ITensor +{ +public: + NeonTensorDecorator(); + + NeonTensorDecorator(arm_compute::ITensor* original, const TensorInfo& info); + + ~NeonTensorDecorator() = default; + + NeonTensorDecorator(const NeonTensorDecorator&) = delete; + + NeonTensorDecorator& operator=(const NeonTensorDecorator&) = delete; + + NeonTensorDecorator(NeonTensorDecorator&&) = default; + + NeonTensorDecorator& operator=(NeonTensorDecorator&&) = default; + + // Inherited methods overridden: + arm_compute::ITensorInfo* info() const override; + + arm_compute::ITensorInfo* info() override; + + uint8_t* buffer() const override; + +private: + arm_compute::ITensor* m_Original; + mutable arm_compute::TensorInfo m_TensorInfo; +}; + +class NeonTensorHandleDecorator : public IAclTensorHandle +{ +public: + NeonTensorHandleDecorator(IAclTensorHandle* parent, const TensorInfo& info) + : m_Tensor(&parent->GetTensor(), info) + { + parentHandle = parent; + } + + arm_compute::ITensor& GetTensor() override { return m_Tensor; } + arm_compute::ITensor const& GetTensor() const override { return m_Tensor; } + + virtual void Allocate() override {} + virtual void Manage() override {} + + virtual ITensorHandle* GetParent() const override { return nullptr; } + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } + + virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>&) override {} + + virtual const void* Map(bool /* blocking = true */) const override + { + return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); + } + virtual void Unmap() const override {} + + TensorShape GetStrides() const override + { + return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); + } + + TensorShape GetShape() const override + { + return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); + } + + virtual std::shared_ptr<ITensorHandle> DecorateTensorHandle(const TensorInfo&) override + { + return nullptr; + }; + +private: + // Only used for testing + void CopyOutTo(void* memory) const override + { + switch (this->GetDataType()) + { + case arm_compute::DataType::F32: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast<float*>(memory)); + break; + case arm_compute::DataType::U8: + case arm_compute::DataType::QASYMM8: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast<uint8_t*>(memory)); + break; + case arm_compute::DataType::QSYMM8: + case arm_compute::DataType::QASYMM8_SIGNED: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast<int8_t*>(memory)); + break; + case arm_compute::DataType::S16: + case arm_compute::DataType::QSYMM16: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast<int16_t*>(memory)); + break; + case arm_compute::DataType::S32: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast<int32_t*>(memory)); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + } + + // Only used for testing + void CopyInFrom(const void* memory) override + { + switch (this->GetDataType()) + { + case arm_compute::DataType::F32: + armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory), + this->GetTensor()); + break; + case arm_compute::DataType::U8: + case arm_compute::DataType::QASYMM8: + armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory), + this->GetTensor()); + break; + case arm_compute::DataType::QSYMM8: + case arm_compute::DataType::QASYMM8_SIGNED: + armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory), + this->GetTensor()); + break; + case arm_compute::DataType::S16: + case arm_compute::DataType::QSYMM16: + armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory), + this->GetTensor()); + break; + case arm_compute::DataType::S32: + armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory), + this->GetTensor()); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + } + + NeonTensorDecorator m_Tensor; + ITensorHandle* parentHandle = nullptr; +}; + + } // namespace armnn diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk index e2439eecb7..4150845f58 100644 --- a/src/backends/neon/backend.mk +++ b/src/backends/neon/backend.mk @@ -19,6 +19,7 @@ BACKEND_SOURCES := \ NeonInterceptorScheduler.cpp \ NeonLayerSupport.cpp \ NeonRegistryInitializer.cpp \ + NeonTensorHandle.cpp \ NeonTensorHandleFactory.cpp \ NeonTimer.cpp \ NeonWorkloadFactory.cpp \ diff --git a/src/backends/neon/test/NeonEndToEndTests.cpp b/src/backends/neon/test/NeonEndToEndTests.cpp index 071ee415de..5672f8b993 100644 --- a/src/backends/neon/test/NeonEndToEndTests.cpp +++ b/src/backends/neon/test/NeonEndToEndTests.cpp @@ -25,6 +25,7 @@ #include <backendsCommon/test/ReshapeEndToEndTestImpl.hpp> #include <backendsCommon/test/SpaceToDepthEndToEndTestImpl.hpp> #include <backendsCommon/test/SplitterEndToEndTestImpl.hpp> +#include <backendsCommon/test/SubgraphUtilsTest.hpp> #include <backendsCommon/test/TransposeConvolution2dEndToEndTestImpl.hpp> #include <backendsCommon/test/TransposeEndToEndTestImpl.hpp> @@ -147,18 +148,18 @@ TEST_CASE("NeonAdditionEndToEndUint8Test") } // Power -TEST_CASE("RefPowerEndToEndTestFloat32") +TEST_CASE("NeonPowerEndToEndTestFloat32") { ElementwiseBinarySimpleEndToEnd<armnn::DataType::Float32>(neonDefaultBackends, BinaryOperation::Power); } // SqDiff -TEST_CASE("RefSquaredDifferenceEndToEndTestFloat32") +TEST_CASE("NeonSquaredDifferenceEndToEndTestFloat32") { ElementwiseBinarySimpleEndToEnd<armnn::DataType::Float32>(neonDefaultBackends, BinaryOperation::SqDiff); } -TEST_CASE("RefSquaredDifferenceEndToEndTestUint8") +TEST_CASE("NeonSquaredDifferenceEndToEndTestUint8") { ElementwiseBinarySimpleEndToEnd<armnn::DataType::QAsymmU8>(neonDefaultBackends, BinaryOperation::SqDiff); } @@ -850,4 +851,19 @@ TEST_CASE("NeonQLstmEndToEndTest") QLstmEndToEnd(neonDefaultBackends); } +TEST_CASE("NeonReshapeRemovalSimpleCaseEndToEnd") +{ + ReshapeRemovalEndToEnd<armnn::DataType::Float32>(neonDefaultBackends); +} + +TEST_CASE("NeonReshapeRemovalNCHWFirstEndToEnd") +{ + ReshapeRemovalNCHWEndToEnd<armnn::DataType::Float32>(neonDefaultBackends, false, true); +} + +TEST_CASE("NeonReshapeRemovalNCHWSecondEndToEnd") +{ + ReshapeRemovalNCHWEndToEnd<armnn::DataType::Float32>(neonDefaultBackends, false, false); +} + } |