From b1aad4270fa8ad5c4aa62e27d564baf723b2cee5 Mon Sep 17 00:00:00 2001 From: Finn Williams Date: Thu, 28 Oct 2021 19:07:32 +0100 Subject: IVGCVSW-6527 Support the new memory API in loaded network * enable external memory management for neon and ref backends * change m_TensorMemoryVector to hold shared pointers * change input layer backend Id to match backend id of connected layer Signed-off-by: Finn Williams Change-Id: I2216a724028312eb101b290df3f224177826b1a0 --- src/backends/backendsCommon/DefaultAllocator.hpp | 4 +-- src/backends/backendsCommon/MemoryManager.cpp | 2 +- src/backends/backendsCommon/MemoryManager.hpp | 11 +++--- src/backends/backendsCommon/common.mk | 7 ++-- .../strategies/SingleAxisPriorityList.cpp | 4 +-- .../backendsCommon/test/CompatibilityTests.cpp | 6 ++-- .../backendsCommon/test/MemoryManagerTests.cpp | 40 ++++++++++++---------- .../backendsCommon/test/OptimizedNetworkTests.cpp | 4 +-- src/backends/cl/ClBackend.hpp | 2 +- src/backends/neon/NeonBackend.hpp | 4 +-- src/backends/neon/NeonTensorHandle.hpp | 12 ++++--- src/backends/reference/RefBackend.hpp | 2 +- src/backends/reference/RefTensorHandle.cpp | 2 +- src/backends/reference/RefWorkloadFactory.cpp | 22 +++++++++--- 14 files changed, 71 insertions(+), 51 deletions(-) (limited to 'src/backends') diff --git a/src/backends/backendsCommon/DefaultAllocator.hpp b/src/backends/backendsCommon/DefaultAllocator.hpp index 2451db3ab8..cf0f1774f0 100644 --- a/src/backends/backendsCommon/DefaultAllocator.hpp +++ b/src/backends/backendsCommon/DefaultAllocator.hpp @@ -22,12 +22,12 @@ public: void* allocate(size_t size, size_t alignment = 0) override { IgnoreUnused(alignment); - return ::operator new(size); + return ::operator new(size_t(size)); } void free(void* ptr) override { - std::free(ptr); + ::operator delete(ptr); } armnn::MemorySource GetMemorySourceType() override diff --git a/src/backends/backendsCommon/MemoryManager.cpp b/src/backends/backendsCommon/MemoryManager.cpp index 1c109c3c91..77cab27789 100644 --- a/src/backends/backendsCommon/MemoryManager.cpp +++ b/src/backends/backendsCommon/MemoryManager.cpp @@ -11,7 +11,7 @@ namespace armnn { void MemoryManager::StoreMemToAllocate(std::vector bufferStorageVector, - ICustomAllocator* customAllocator, + std::shared_ptr customAllocator, const size_t typeAlignment) { IgnoreUnused(typeAlignment); diff --git a/src/backends/backendsCommon/MemoryManager.hpp b/src/backends/backendsCommon/MemoryManager.hpp index cbd6fcf9bc..5113b231d3 100644 --- a/src/backends/backendsCommon/MemoryManager.hpp +++ b/src/backends/backendsCommon/MemoryManager.hpp @@ -2,6 +2,7 @@ // Copyright © 2021 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // +#pragma once #include @@ -10,7 +11,7 @@ namespace armnn struct Allocator { /// Pointer to @ICustomAllocator. - ICustomAllocator* m_CustomAllocator{}; + std::shared_ptr m_CustomAllocator{}; /// Value which the size of each buffer (actual data size + padding) has to be a multiple of. size_t m_Alignment = 0 ; }; @@ -19,16 +20,16 @@ struct TensorMemory { /// Number of bytes the value is away from the @BufferStorage.m_Buffer. size_t m_Offset{}; - /// Pointer to the tensor value. - void* m_Data = nullptr; /// Identifier to be used by the @LoadedNetwork to order the tensors. unsigned int m_OutputSlotId{}; + /// Pointer to the tensor value. + void* m_Data = nullptr; }; struct BufferStorage { /// Vector of pointer to @TensorMemory. - std::vector m_TensorMemoryVector; + std::vector> m_TensorMemoryVector; /// Total size of the buffer. size_t m_BufferSize; /// Pointer to the first element of the buffer. @@ -43,7 +44,7 @@ public: /// @param[in] customAllocator - Pointer to @ICustomAllocator. /// @param[in] typeAlignment - Optional parameter. Value of which the size of each value has to be multiple of. void StoreMemToAllocate(std::vector bufferStorageVector, - ICustomAllocator* customAllocator, + std::shared_ptr customAllocator, size_t typeAlignment = 0); /// Allocate the amount of memory indicated by @m_BufferSize, and diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk index a77ec06035..56c9d6545a 100644 --- a/src/backends/backendsCommon/common.mk +++ b/src/backends/backendsCommon/common.mk @@ -17,6 +17,7 @@ COMMON_SOURCES := \ MapWorkload.cpp \ MemCopyWorkload.cpp \ MemImportWorkload.cpp \ + MemoryManager.cpp \ MemSyncWorkload.cpp \ OptimizationViews.cpp \ TensorHandleFactoryRegistry.cpp \ @@ -25,7 +26,8 @@ COMMON_SOURCES := \ WorkloadFactory.cpp \ WorkloadUtils.cpp \ memoryOptimizerStrategyLibrary/strategies/ConstantMemoryStrategy.cpp \ - memoryOptimizerStrategyLibrary/strategies/StrategyValidator.cpp \ + memoryOptimizerStrategyLibrary/strategies/SingleAxisPriorityList.cpp \ + memoryOptimizerStrategyLibrary/strategies/StrategyValidator.cpp # COMMON_TEST_SOURCES contains the list of files to be included @@ -104,7 +106,8 @@ COMMON_TEST_SOURCES := \ test/layerTests/TransposeConvolution2dTestImpl.cpp \ test/layerTests/UnidirectionalSequenceLstmTestImpl.cpp \ memoryOptimizerStrategyLibrary/test/ConstMemoryStrategyTests.cpp \ - memoryOptimizerStrategyLibrary/test/ValidatorStrategyTests.cpp + memoryOptimizerStrategyLibrary/test/ValidatorStrategyTests.cpp \ + memoryOptimizerStrategyLibrary/test/SingleAxisPriorityListTests.cpp ifeq ($(ARMNN_REF_ENABLED),1) COMMON_TEST_SOURCES += \ diff --git a/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/strategies/SingleAxisPriorityList.cpp b/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/strategies/SingleAxisPriorityList.cpp index 3afa061681..738b7137a7 100644 --- a/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/strategies/SingleAxisPriorityList.cpp +++ b/src/backends/backendsCommon/memoryOptimizerStrategyLibrary/strategies/SingleAxisPriorityList.cpp @@ -155,9 +155,9 @@ void SingleAxisPriorityList::PlaceBlocks(const std::list& priorityLis // The indexes don't match we need at least two words // Zero the bits to the right of curBlock->m_EndOfLife - remainder = (curBlock->m_EndOfLife +1 - lastWordIndex * wordSize); + remainder = (curBlock->m_EndOfLife - lastWordIndex * wordSize); - size_t lastWord = (1u << remainder) - 1; + size_t lastWord = (1ul << remainder) - 1; lastWord = lastWord << (wordSize - remainder); if(firstWordIndex + 1 == lastWordIndex) diff --git a/src/backends/backendsCommon/test/CompatibilityTests.cpp b/src/backends/backendsCommon/test/CompatibilityTests.cpp index d18a8fbb6c..3685f75986 100644 --- a/src/backends/backendsCommon/test/CompatibilityTests.cpp +++ b/src/backends/backendsCommon/test/CompatibilityTests.cpp @@ -181,7 +181,7 @@ TEST_CASE ("Ref_Backends_Capability_Test") {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", true}, - {"ExternallyManagedMemory", false}, + {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}}); } @@ -200,7 +200,7 @@ TEST_CASE ("Neon_Backends_Capability_Test") {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", false}, {"PreImportIOTensors", false}, - {"ExternallyManagedMemory", false}, + {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}}); } @@ -219,7 +219,7 @@ TEST_CASE ("Cl_Backends_Capability_Test") {"ProtectedContentAllocation", true}, {"ConstantTensorsAsInputs", false}, {"PreImportIOTensors", false}, - {"ExternallyManagedMemory", false}, + {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}}); } diff --git a/src/backends/backendsCommon/test/MemoryManagerTests.cpp b/src/backends/backendsCommon/test/MemoryManagerTests.cpp index c873499ef3..662a5c2423 100644 --- a/src/backends/backendsCommon/test/MemoryManagerTests.cpp +++ b/src/backends/backendsCommon/test/MemoryManagerTests.cpp @@ -59,17 +59,18 @@ TEST_CASE("MemoryManagerTest") // Create mock up bufferStorageVector with 2 BufferStorage with the same TensorMemory size_t numTensors = 5; - std::vector tensorMemoryPointerVector(numTensors); - std::vector tensorMemoryVector; + std::vector> tensorMemoryPointerVector(numTensors); + std::vector> tensorMemoryVector; tensorMemoryVector.reserve(numTensors); std::vector offsets(numTensors); std::iota(std::begin(offsets), std::end(offsets), 0); - for (uint32_t idx = 0; idx < tensorMemoryPointerVector.size(); ++idx) + for (uint idx = 0; idx < tensorMemoryPointerVector.size(); ++idx) { - tensorMemoryVector.emplace_back(TensorMemory{offsets[idx], nullptr, 0}); - tensorMemoryPointerVector[idx] = &tensorMemoryVector[idx]; + tensorMemoryVector.emplace_back(std::make_shared(TensorMemory{offsets[idx], 0, nullptr})); + + tensorMemoryPointerVector[idx] = tensorMemoryVector[idx]; } std::vector bufferStorageVector; @@ -77,30 +78,31 @@ TEST_CASE("MemoryManagerTest") bufferStorageVector.emplace_back(BufferStorage{tensorMemoryPointerVector, numTensors}); // Create an instance of the SampleCustomAllocator - SampleCustomAllocator customAllocator = SampleCustomAllocator(); - customAllocator.m_Values = {10, 11, 12, 13, 14}; + std::shared_ptr customAllocator = + std::make_unique(SampleCustomAllocator()); + + customAllocator->m_Values = {10, 11, 12, 13, 14}; // Check that the test was set up correctly - CHECK(customAllocator.m_Values.size() == numTensors); + CHECK(customAllocator->m_Values.size() == numTensors); + size_t bufferVecSize = bufferStorageVector.size(); // Utilise 3 functions in the MemoryManager. Check the counters and the pointer to the values are correct. MemoryManager memoryManager; - memoryManager.StoreMemToAllocate(bufferStorageVector, &customAllocator); + memoryManager.StoreMemToAllocate(bufferStorageVector, customAllocator); memoryManager.Allocate(); - CHECK(customAllocator.m_CounterAllocate == bufferStorageVector.size()); - for (const auto& bufferStorage : bufferStorageVector) + CHECK(customAllocator->m_CounterAllocate == bufferVecSize); + + uint idx = 0; + for (auto tensorMemory : tensorMemoryVector) { - uint32_t idx = 0; - for (auto tensorMemory : bufferStorage.m_TensorMemoryVector) - { - auto value = reinterpret_cast(tensorMemory->m_Data); - CHECK(customAllocator.m_Values[idx] == *value); - idx += 1; - } + auto value = reinterpret_cast(tensorMemory->m_Data); + CHECK(customAllocator->m_Values[idx] == *value); + idx += 1; } memoryManager.Deallocate(); - CHECK(customAllocator.m_CounterFree == bufferStorageVector.size()); + CHECK(customAllocator->m_CounterFree == bufferStorageVector.size()); } } diff --git a/src/backends/backendsCommon/test/OptimizedNetworkTests.cpp b/src/backends/backendsCommon/test/OptimizedNetworkTests.cpp index 012737e1d7..b0ee9bee32 100644 --- a/src/backends/backendsCommon/test/OptimizedNetworkTests.cpp +++ b/src/backends/backendsCommon/test/OptimizedNetworkTests.cpp @@ -138,7 +138,7 @@ TEST_CASE("OptimizeValidateDeviceNonSupportLayerWithFallback") // the other layers are supported by CpuRef. // If NEON is not enabled, all layers are supported by CpuRef. #if defined(ARMCOMPUTENEON_ENABLED) - if (layer->GetType() == armnn::LayerType::Input || layer->GetType() == armnn::LayerType::Output) + if (layer->GetType() == armnn::LayerType::Output) { CHECK(layer->GetBackendId() == armnn::Compute::CpuAcc); } @@ -337,7 +337,7 @@ TEST_CASE("OptimizeValidateWorkloadsDuplicateComputeDeviceWithFallback") // the other layers are supported by CpuRef. // If neither NEON, nor CL is enabled, all layers are supported by CpuRef. #if defined(ARMCOMPUTENEON_ENABLED) - if (layer->GetType() == armnn::LayerType::Input || layer->GetType() == armnn::LayerType::Output) + if (layer->GetType() == armnn::LayerType::Output) { CHECK(layer->GetBackendId() == armnn::Compute::CpuAcc); } diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp index 7597d093be..99fe9069ff 100644 --- a/src/backends/cl/ClBackend.hpp +++ b/src/backends/cl/ClBackend.hpp @@ -29,7 +29,7 @@ const BackendCapabilities gpuAccCapabilities("GpuAcc", {"ProtectedContentAllocation", true}, {"ConstantTensorsAsInputs", false}, {"PreImportIOTensors", false}, - {"ExternallyManagedMemory", false}, + {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true} }); diff --git a/src/backends/neon/NeonBackend.hpp b/src/backends/neon/NeonBackend.hpp index 68d60a4c04..e53bacb84a 100644 --- a/src/backends/neon/NeonBackend.hpp +++ b/src/backends/neon/NeonBackend.hpp @@ -10,14 +10,14 @@ namespace armnn { // add new capabilities here.. -const BackendCapabilities cpuAccCapabilities("GpuAcc", +const BackendCapabilities cpuAccCapabilities("CpuAcc", { {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", false}, {"PreImportIOTensors", false}, - {"ExternallyManagedMemory", false}, + {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true} }); diff --git a/src/backends/neon/NeonTensorHandle.hpp b/src/backends/neon/NeonTensorHandle.hpp index ae8aa5d8c7..dd4c2572f9 100644 --- a/src/backends/neon/NeonTensorHandle.hpp +++ b/src/backends/neon/NeonTensorHandle.hpp @@ -29,7 +29,8 @@ public: NeonTensorHandle(const TensorInfo& tensorInfo) : m_ImportFlags(static_cast(MemorySource::Malloc)), m_Imported(false), - m_IsImportEnabled(false) + m_IsImportEnabled(false), + m_TypeAlignment(GetDataTypeSize(tensorInfo.GetDataType())) { armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo); } @@ -39,7 +40,9 @@ public: MemorySourceFlags importFlags = static_cast(MemorySource::Malloc)) : m_ImportFlags(importFlags), m_Imported(false), - m_IsImportEnabled(false) + m_IsImportEnabled(false), + m_TypeAlignment(GetDataTypeSize(tensorInfo.GetDataType())) + { armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout); @@ -117,9 +120,7 @@ public: { if (source == MemorySource::Malloc && m_IsImportEnabled) { - // Checks the 16 byte memory alignment - constexpr uintptr_t alignment = sizeof(size_t); - if (reinterpret_cast(memory) % alignment) + if (reinterpret_cast(memory) % m_TypeAlignment) { throw MemoryImportException("NeonTensorHandle::Import Attempting to import unaligned memory"); } @@ -263,6 +264,7 @@ private: MemorySourceFlags m_ImportFlags; bool m_Imported; bool m_IsImportEnabled; + const uintptr_t m_TypeAlignment; }; class NeonSubTensorHandle : public IAclTensorHandle diff --git a/src/backends/reference/RefBackend.hpp b/src/backends/reference/RefBackend.hpp index 6114ce6218..da04f22d93 100644 --- a/src/backends/reference/RefBackend.hpp +++ b/src/backends/reference/RefBackend.hpp @@ -16,7 +16,7 @@ const BackendCapabilities cpuRefCapabilities("CpuRef", {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", true}, - {"ExternallyManagedMemory", false}, + {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true} }); diff --git a/src/backends/reference/RefTensorHandle.cpp b/src/backends/reference/RefTensorHandle.cpp index b9e566eace..5229e9d62b 100644 --- a/src/backends/reference/RefTensorHandle.cpp +++ b/src/backends/reference/RefTensorHandle.cpp @@ -122,7 +122,7 @@ bool RefTensorHandle::Import(void* memory, MemorySource source) if (m_IsImportEnabled && source == MemorySource::Malloc) { // Check memory alignment - constexpr uintptr_t alignment = sizeof(size_t); + uintptr_t alignment = GetDataTypeSize(m_TensorInfo.GetDataType()); if (reinterpret_cast(memory) % alignment) { if (m_Imported) diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp index 75008bc866..36dcd21d32 100644 --- a/src/backends/reference/RefWorkloadFactory.cpp +++ b/src/backends/reference/RefWorkloadFactory.cpp @@ -113,10 +113,14 @@ bool RefWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer, std::unique_ptr RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, const bool isMemoryManaged) const { - // For Ref it is okay to make the TensorHandle memory managed as it can also store a pointer - // to unmanaged memory. This also ensures memory alignment. - IgnoreUnused(isMemoryManaged); - return std::make_unique(tensorInfo, m_MemoryManager); + if (isMemoryManaged) + { + return std::make_unique(tensorInfo, m_MemoryManager); + } + else + { + return std::make_unique(tensorInfo, static_cast(MemorySource::Malloc)); + } } std::unique_ptr RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, @@ -126,7 +130,15 @@ std::unique_ptr RefWorkloadFactory::CreateTensorHandle(const Tens // For Ref it is okay to make the TensorHandle memory managed as it can also store a pointer // to unmanaged memory. This also ensures memory alignment. IgnoreUnused(isMemoryManaged, dataLayout); - return std::make_unique(tensorInfo, m_MemoryManager); + + if (isMemoryManaged) + { + return std::make_unique(tensorInfo, m_MemoryManager); + } + else + { + return std::make_unique(tensorInfo, static_cast(MemorySource::Malloc)); + } } std::unique_ptr RefWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, -- cgit v1.2.1