diff options
-rw-r--r-- | include/armnn/BackendRegistry.hpp | 5 | ||||
-rw-r--r-- | include/armnn/IRuntime.hpp | 20 | ||||
-rw-r--r-- | include/armnn/backends/IBackendInternal.hpp | 5 | ||||
-rw-r--r-- | include/armnn/backends/ICustomAllocator.hpp | 18 | ||||
-rw-r--r-- | samples/CMakeLists.txt | 5 | ||||
-rw-r--r-- | samples/CustomMemoryAllocatorSample.cpp | 175 | ||||
-rw-r--r-- | src/armnn/BackendRegistry.cpp | 21 | ||||
-rw-r--r-- | src/armnn/Runtime.cpp | 64 | ||||
-rw-r--r-- | src/armnn/test/OptimizerTests.cpp | 133 | ||||
-rw-r--r-- | src/backends/aclCommon/BaseMemoryManager.cpp | 2 | ||||
-rw-r--r-- | src/backends/aclCommon/BaseMemoryManager.hpp | 8 | ||||
-rw-r--r-- | src/backends/backendsCommon/test/CompatibilityTests.cpp | 4 | ||||
-rw-r--r-- | src/backends/cl/ClBackend.cpp | 62 | ||||
-rw-r--r-- | src/backends/cl/ClBackend.hpp | 128 | ||||
-rw-r--r-- | src/backends/cl/ClImportTensorHandle.hpp | 10 | ||||
-rw-r--r-- | src/backends/cl/ClRegistryInitializer.cpp | 8 | ||||
-rw-r--r-- | src/backends/cl/test/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/backends/cl/test/ClCustomAllocatorTests.cpp | 160 | ||||
-rw-r--r-- | src/backends/cl/test/ClImportTensorHandleTests.cpp | 2 |
19 files changed, 658 insertions, 173 deletions
diff --git a/include/armnn/BackendRegistry.hpp b/include/armnn/BackendRegistry.hpp index fe6451cde0..c13aa9f8b6 100644 --- a/include/armnn/BackendRegistry.hpp +++ b/include/armnn/BackendRegistry.hpp @@ -7,6 +7,7 @@ #include <armnn/Types.hpp> #include <armnn/BackendId.hpp> #include <armnn/Optional.hpp> +#include <armnn/backends/ICustomAllocator.hpp> #include <memory> #include <unordered_map> @@ -35,6 +36,8 @@ public: BackendIdSet GetBackendIds() const; std::string GetBackendIdsAsString() const; void SetProfilingService(armnn::Optional<profiling::ProfilingService&> profilingService); + void RegisterAllocator(const BackendId& id, std::shared_ptr<ICustomAllocator> alloc); + std::unordered_map<BackendId, std::shared_ptr<ICustomAllocator>> GetAllocators(); BackendRegistry() {} virtual ~BackendRegistry() {} @@ -50,6 +53,7 @@ public: }; void Deregister(const BackendId& id); + void DeregisterAllocator(const BackendId &id); protected: using FactoryStorage = std::unordered_map<BackendId, FactoryFunction>; @@ -63,6 +67,7 @@ private: FactoryStorage m_Factories; armnn::Optional<profiling::ProfilingService&> m_ProfilingService; + std::unordered_map<BackendId, std::shared_ptr<ICustomAllocator>> m_CustomMemoryAllocatorMap; }; BackendRegistry& BackendRegistryInstance(); diff --git a/include/armnn/IRuntime.hpp b/include/armnn/IRuntime.hpp index 8c269dee49..97a9c2889e 100644 --- a/include/armnn/IRuntime.hpp +++ b/include/armnn/IRuntime.hpp @@ -16,6 +16,7 @@ #include <armnn/backends/ICustomAllocator.hpp> #include <memory> +#include <map> namespace armnn { @@ -103,8 +104,8 @@ public: : m_GpuAccTunedParameters(nullptr) , m_EnableGpuProfiling(false) , m_DynamicBackendsPath("") - , m_CustomAllocator(nullptr) , m_ProtectedMode(false) + , m_CustomAllocatorMap() {} /// If set, uses the GpuAcc tuned parameters from the given object when executing GPU workloads. @@ -118,17 +119,22 @@ public: /// Only a single path is allowed for the override std::string m_DynamicBackendsPath; - /// A Custom Allocator used for allocation of working memory in the backends. - /// Set this for when you need to allocate Protected Working Memory, required for ProtectedMode - /// Only supported for GpuAcc - ICustomAllocator* m_CustomAllocator; - /// Setting this flag will allow the user to create the Runtime in protected mode. /// It will run all the inferences on protected memory and will make sure that /// INetworkProperties::m_ImportEnabled set to true with MemorySource::DmaBufProtected option - /// This will use Protected Memory Allocator associated with the backend + /// This requires that the backend supports Protected Memory and has an allocator capable of + /// allocating Protected Memory associated with it. bool m_ProtectedMode; + /// @brief A map to define a custom memory allocator for specific backend Ids. + /// + /// @details A Custom Allocator is used for allocation of working memory in the backends. + /// Set this if you need to take control of how memory is allocated on a backend. Required for + /// Protected Mode in order to correctly allocate Protected Memory + /// + /// @note Only supported for GpuAcc + std::map<BackendId, std::shared_ptr<ICustomAllocator>> m_CustomAllocatorMap; + struct ExternalProfilingOptions { ExternalProfilingOptions() diff --git a/include/armnn/backends/IBackendInternal.hpp b/include/armnn/backends/IBackendInternal.hpp index 3b4ef95703..626746465f 100644 --- a/include/armnn/backends/IBackendInternal.hpp +++ b/include/armnn/backends/IBackendInternal.hpp @@ -199,10 +199,13 @@ public: /// Signals the backend to use a custom memory allocator provided by the user /// + /// \param allocator - a pointer to the provided ICustomAllocator to use with this backend /// \param errMsg - Optional string variable to return error messages /// \return - Returns true if switching to custom allocator was successful - virtual bool UseCustomMemoryAllocator(armnn::Optional<std::string&> errMsg) + virtual bool UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator, + armnn::Optional<std::string&> errMsg) { + IgnoreUnused(allocator); if (errMsg) { std::stringstream message; diff --git a/include/armnn/backends/ICustomAllocator.hpp b/include/armnn/backends/ICustomAllocator.hpp index 1d4df0cb86..92cbcc2641 100644 --- a/include/armnn/backends/ICustomAllocator.hpp +++ b/include/armnn/backends/ICustomAllocator.hpp @@ -7,6 +7,7 @@ #include <cstddef> #include <memory> +#include <armnn/MemorySources.hpp> namespace armnn { @@ -23,13 +24,20 @@ public: * @param[in] alignment Alignment that the returned pointer should comply with * * @return A pointer to the allocated memory + * The returned pointer must be host write accessible */ - virtual void *allocate(size_t size, size_t alignment) = 0; - /** Interface to be implemented by the child class to free the allocated tensor */ - virtual void free(void *ptr) = 0; + virtual void* allocate(size_t size, size_t alignment) = 0; - // Utility Function to define the Custom Memory Allocators capabilities - virtual bool SupportsProtectedMemory() = 0; + /** Interface to be implemented by the child class to free the allocated bytes */ + virtual void free(void* ptr) = 0; + + // Used to specify what type of memory is being allocated by this allocator. + // Supported types are: + // MemorySource::Malloc + // Unsupported types are: + // MemorySource::DmaBuf + // MemorySource::DmaBufProtected + virtual armnn::MemorySource GetMemorySourceType() = 0; }; } // namespace armnn
\ No newline at end of file diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index ff45eecbe0..7be6a69369 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -8,3 +8,8 @@ if(BUILD_SAMPLE_APP AND SAMPLE_DYNAMIC_BACKEND) target_link_libraries(DynamicSample armnn ${CMAKE_THREAD_LIBS_INIT}) endif() +if(BUILD_SAMPLE_APP AND ARMCOMPUTECL) + add_executable(CustomMemoryAllocatorSample CustomMemoryAllocatorSample.cpp) + target_link_libraries(CustomMemoryAllocatorSample armnn ${CMAKE_THREAD_LIBS_INIT}) +endif() + diff --git a/samples/CustomMemoryAllocatorSample.cpp b/samples/CustomMemoryAllocatorSample.cpp new file mode 100644 index 0000000000..51b3c81079 --- /dev/null +++ b/samples/CustomMemoryAllocatorSample.cpp @@ -0,0 +1,175 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include <armnn/ArmNN.hpp> +#include <armnn/backends/ICustomAllocator.hpp> + +#include <arm_compute/core/CL/CLKernelLibrary.h> +#include <arm_compute/runtime/CL/CLScheduler.h> + +#include <iostream> + +/** Sample implementation of ICustomAllocator for use with the ClBackend. + * Note: any memory allocated must be host addressable with write access + * in order for ArmNN to be able to properly use it. */ +class SampleClBackendCustomAllocator : public armnn::ICustomAllocator +{ +public: + SampleClBackendCustomAllocator() = default; + + void* allocate(size_t size, size_t alignment) + { + // If alignment is 0 just use the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE for alignment + if (alignment == 0) + { + alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>(); + } + size_t space = size + alignment + alignment; + auto allocatedMemPtr = std::malloc(space * sizeof(size_t)); + + if (std::align(alignment, size, allocatedMemPtr, space) == nullptr) + { + throw armnn::Exception("SampleClBackendCustomAllocator::Alignment failed"); + } + return allocatedMemPtr; + } + + void free(void* ptr) + { + std::free(ptr); + } + + armnn::MemorySource GetMemorySourceType() + { + return armnn::MemorySource::Malloc; + } +}; + + +// A simple example application to show the usage of a custom memory allocator. In this sample, the users single +// input number is multiplied by 1.0f using a fully connected layer with a single neuron to produce an output +// number that is the same as the input. All memory required to execute this mini network is allocated with +// the provided custom allocator. +// +// Using a Custom Allocator is required for use with Protected Mode and Protected Memory. +// This example is provided using only unprotected malloc as Protected Memory is platform +// and implementation specific. +// +// Note: This example is similar to the SimpleSample application that can also be found in armnn/samples. +// The differences are in the use of a custom allocator, the backend is GpuAcc, and the inputs/outputs +// are being imported instead of copied. (Import must be enabled when using a Custom Allocator) +// You might find this useful for comparison. +int main() +{ + using namespace armnn; + + float number; + std::cout << "Please enter a number: " << std::endl; + std::cin >> number; + + // Turn on logging to standard output + // This is useful in this sample so that users can learn more about what is going on + armnn::ConfigureLogging(true, false, LogSeverity::Info); + + // Construct ArmNN network + armnn::NetworkId networkIdentifier; + INetworkPtr myNetwork = INetwork::Create(); + armnn::FullyConnectedDescriptor fullyConnectedDesc; + float weightsData[] = {1.0f}; // Identity + TensorInfo weightsInfo(TensorShape({1, 1}), DataType::Float32); + weightsInfo.SetConstant(true); + armnn::ConstTensor weights(weightsInfo, weightsData); + ARMNN_NO_DEPRECATE_WARN_BEGIN + IConnectableLayer *fullyConnected = myNetwork->AddFullyConnectedLayer(fullyConnectedDesc, + weights, + EmptyOptional(), + "fully connected"); + ARMNN_NO_DEPRECATE_WARN_END + IConnectableLayer *InputLayer = myNetwork->AddInputLayer(0); + IConnectableLayer *OutputLayer = myNetwork->AddOutputLayer(0); + InputLayer->GetOutputSlot(0).Connect(fullyConnected->GetInputSlot(0)); + fullyConnected->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0)); + + // Create ArmNN runtime: + // + // This is the interesting bit when executing a model with a custom allocator. + // You can have different allocators for different backends. To support this + // the runtime creation option has a map that takes a BackendId and the corresponding + // allocator that should be used for that backend. + // Only GpuAcc supports a Custom Allocator for now + // + // Note: This is not covered in this example but if you want to run a model on + // protected memory a custom allocator needs to be provided that supports + // protected memory allocations and the MemorySource of that allocator is + // set to MemorySource::DmaBufProtected + IRuntime::CreationOptions options; + auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>(); + options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}}; + IRuntimePtr runtime = IRuntime::Create(options); + + //Set the tensors in the network. + TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32); + InputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo); + + unsigned int numElements = inputTensorInfo.GetNumElements(); + size_t totalBytes = numElements * sizeof(float); + + TensorInfo outputTensorInfo(TensorShape({1, 1}), DataType::Float32); + fullyConnected->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); + + // Optimise ArmNN network + OptimizerOptions optOptions; + optOptions.m_ImportEnabled = true; + armnn::IOptimizedNetworkPtr optNet = + Optimize(*myNetwork, {"GpuAcc"}, runtime->GetDeviceSpec(), optOptions); + if (!optNet) + { + // This shouldn't happen for this simple sample, with GpuAcc backend. + // But in general usage Optimize could fail if the backend at runtime cannot + // support the model that has been provided. + std::cerr << "Error: Failed to optimise the input network." << std::endl; + return 1; + } + + // Load graph into runtime + std::string ignoredErrorMessage; + INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc); + runtime->LoadNetwork(networkIdentifier, std::move(optNet), ignoredErrorMessage, networkProperties); + + // Creates structures for input & output + const size_t alignment = + arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>(); + + void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment); + + // Input with negative values + auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr); + std::fill_n(inputPtr, numElements, number); + + void* alignedOutputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment); + auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr); + std::fill_n(outputPtr, numElements, -10.0f); + + + armnn::InputTensors inputTensors + { + {0, armnn::ConstTensor(runtime->GetInputTensorInfo(networkIdentifier, 0), alignedInputPtr)}, + }; + armnn::OutputTensors outputTensors + { + {0, armnn::Tensor(runtime->GetOutputTensorInfo(networkIdentifier, 0), alignedOutputPtr)} + }; + + // Execute network + runtime->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors); + + // Tell the CLBackend to sync memory so we can read the output. + arm_compute::CLScheduler::get().sync(); + auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr); + std::cout << "Your number was " << outputResult[0] << std::endl; + runtime->UnloadNetwork(networkIdentifier); + return 0; + +} diff --git a/src/armnn/BackendRegistry.cpp b/src/armnn/BackendRegistry.cpp index ff63c8236a..80daed9896 100644 --- a/src/armnn/BackendRegistry.cpp +++ b/src/armnn/BackendRegistry.cpp @@ -39,6 +39,7 @@ void BackendRegistry::Register(const BackendId& id, BackendRegistry::FactoryFunc void BackendRegistry::Deregister(const BackendId& id) { m_Factories.erase(id); + DeregisterAllocator(id); if (m_ProfilingService.has_value() && m_ProfilingService.value().IsProfilingEnabled()) { @@ -106,5 +107,25 @@ void BackendRegistry::SetProfilingService(armnn::Optional<profiling::ProfilingSe m_ProfilingService = profilingService; } +void BackendRegistry::RegisterAllocator(const BackendId& id, std::shared_ptr<ICustomAllocator> alloc) +{ + if (m_CustomMemoryAllocatorMap.find(id) != m_CustomMemoryAllocatorMap.end()) + { + throw InvalidArgumentException( + std::string(id) + " already has an allocator associated with it", + CHECK_LOCATION()); + } + m_CustomMemoryAllocatorMap[id] = alloc; +} + +void BackendRegistry::DeregisterAllocator(const BackendId& id) +{ + m_CustomMemoryAllocatorMap.erase(id); +} + +std::unordered_map<BackendId, std::shared_ptr<ICustomAllocator>> BackendRegistry::GetAllocators() +{ + return m_CustomMemoryAllocatorMap; +} } // namespace armnn diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp index c2b748653d..9fe58287c3 100644 --- a/src/armnn/Runtime.cpp +++ b/src/armnn/Runtime.cpp @@ -130,7 +130,8 @@ Status RuntimeImpl::LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr inNetwork, std::string& errorMessage) { - INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined); + INetworkProperties networkProperties( + false, MemorySource::Undefined, MemorySource::Undefined); return LoadNetwork(networkIdOut, std::move(inNetwork), errorMessage, networkProperties); } @@ -267,7 +268,8 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options) if ( options.m_ProfilingOptions.m_TimelineEnabled && !options.m_ProfilingOptions.m_EnableProfiling ) { - throw RuntimeException("It is not possible to enable timeline reporting without profiling being enabled"); + throw RuntimeException( + "It is not possible to enable timeline reporting without profiling being enabled"); } // Load any available/compatible dynamic backend before the runtime @@ -283,6 +285,8 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options) auto backend = factoryFun(); ARMNN_ASSERT(backend.get() != nullptr); + auto customAllocatorMapIterator = options.m_CustomAllocatorMap.find(id); + // If the runtime is created in protected mode only add backends that support this mode if (options.m_ProtectedMode) { @@ -298,17 +302,61 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options) << " is not registered as does not support protected content allocation \n"; continue; } - std::string err; - if (!backend->UseCustomMemoryAllocator(err)) + // The user is responsible to provide a custom memory allocator which allows to allocate + // protected memory + if (customAllocatorMapIterator != options.m_CustomAllocatorMap.end()) { - ARMNN_LOG(error) << "The backend " + std::string err; + if (customAllocatorMapIterator->second->GetMemorySourceType() + == armnn::MemorySource::DmaBufProtected) + { + if (!backend->UseCustomMemoryAllocator(customAllocatorMapIterator->second, err)) + { + ARMNN_LOG(error) << "The backend " + << id + << " reported an error when entering protected mode. Backend won't be" + << " used. ErrorMsg: " << err; + continue; + } + // No errors so register the Custom Allocator with the BackendRegistry + BackendRegistryInstance().RegisterAllocator(id, customAllocatorMapIterator->second); + } + else + { + ARMNN_LOG(error) << "The CustomAllocator provided with the runtime options doesn't support " + "protected memory. Protected mode can't be activated. The backend " << id - << " reported an error when entering protected mode. Backend won't be used." - << " ErrorMsg: " << err; + << " is not going to be used. MemorySource must be MemorySource::DmaBufProtected"; + continue; + } + } + else + { + ARMNN_LOG(error) << "Protected mode can't be activated for backend: " + << id + << " no custom allocator was provided to the runtime options."; continue; } } - + else + { + // If a custom memory allocator is provided make the backend use that instead of the default + if (customAllocatorMapIterator != options.m_CustomAllocatorMap.end()) + { + std::string err; + if (!backend->UseCustomMemoryAllocator(customAllocatorMapIterator->second, err)) + { + ARMNN_LOG(error) << "The backend " + << id + << " reported an error when trying to use the provided custom allocator." + " Backend won't be used." + << " ErrorMsg: " << err; + continue; + } + // No errors so register the Custom Allocator with the BackendRegistry + BackendRegistryInstance().RegisterAllocator(id, customAllocatorMapIterator->second); + } + } auto context = backend->CreateBackendContext(options); // backends are allowed to return nullptrs if they diff --git a/src/armnn/test/OptimizerTests.cpp b/src/armnn/test/OptimizerTests.cpp index 19bd58193a..38aef671d2 100644 --- a/src/armnn/test/OptimizerTests.cpp +++ b/src/armnn/test/OptimizerTests.cpp @@ -220,9 +220,10 @@ public: return m_BackendCapabilities; }; - virtual bool UseCustomMemoryAllocator(armnn::Optional<std::string&> errMsg) override + virtual bool UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator, + armnn::Optional<std::string&> errMsg) override { - IgnoreUnused(errMsg); + IgnoreUnused(errMsg, allocator); m_CustomAllocator = true; return m_CustomAllocator; } @@ -925,131 +926,3 @@ TEST_CASE("OptimizeForExclusiveConnectionsWithoutFuseTest") &IsLayerOfType<armnn::OutputLayer>)); } } // Optimizer TestSuite - -TEST_SUITE("Runtime") -{ -// This test really belongs into RuntimeTests.cpp but it requires all sort of MockBackends which are -// already defined here -TEST_CASE("RuntimeProtectedModeOption") -{ - using namespace armnn; - - struct MockPolicy - { - static const BackendId& GetIdStatic() - { - static BackendId id = "MockBackend"; - return id; - } - }; - - struct ProtectedPolicy - { - static const BackendId& GetIdStatic() - { - static BackendId id = "MockBackendProtectedContent"; - return id; - } - }; - - struct SillyPolicy - { - static const BackendId& GetIdStatic() - { - static BackendId id = "SillyMockBackend"; - return id; - } - }; - - BackendCapabilities mockBackendCapabilities("MockBackend", - { - {"ProtectedContentAllocation", false} - }); - BackendCapabilities mockProtectedBackendCapabilities("MockBackendProtectedContent", - { - {"ProtectedContentAllocation", true} - }); - - auto& backendRegistry = BackendRegistryInstance(); - - // clean up from previous test runs - std::vector<BackendId> mockBackends = {"MockBackend", "MockBackendProtectedContent", "SillyMockBackend"}; - for (auto& backend : mockBackends) - { - backendRegistry.Deregister(backend); - } - - // Create a bunch of MockBackends with different capabilities - // 1. Doesn't support protected mode even though it knows about this capability - backendRegistry.Register("MockBackend", [mockBackendCapabilities]() - { - return std::make_unique<MockBackend<MockPolicy>>(mockBackendCapabilities); - }); - // 2. Supports protected mode and has it implemented correctly - backendRegistry.Register("MockBackendProtectedContent", [mockProtectedBackendCapabilities]() - { - return std::make_unique<MockBackend<ProtectedPolicy>>(mockProtectedBackendCapabilities); - }); - // 3. Claims to support protected mode but doesn't have the UseCustomMemoryAllocator function implemented - backendRegistry.Register("SillyMockBackend", [mockProtectedBackendCapabilities]() - { - return std::make_unique<NoProtectedModeMockBackend<SillyPolicy>>(mockProtectedBackendCapabilities); - }); - - // Creates a runtime that is not in protected mode - { - IRuntime::CreationOptions creationOptions; - creationOptions.m_ProtectedMode = false; - - IRuntimePtr run = IRuntime::Create(creationOptions); - - const armnn::BackendIdSet supportedDevices = run->GetDeviceSpec().GetSupportedBackends(); - // Both MockBackends that are registered should show up in the runtimes supported backends list - for (auto& backend : mockBackends) - { - CHECK(std::find(supportedDevices.cbegin(), supportedDevices.cend(), backend) != supportedDevices.cend()); - } - } - - // If the runtime is in protected mode only backends that support protected content should be added - { - IRuntime::CreationOptions creationOptions; - creationOptions.m_ProtectedMode = true; - - IRuntimePtr run = IRuntime::Create(creationOptions); - - const armnn::BackendIdSet supportedDevices = run->GetDeviceSpec().GetSupportedBackends(); - // Only the MockBackends that claims support for protected content should show up in the - // runtimes supported backends list - CHECK(std::find(supportedDevices.cbegin(), - supportedDevices.cend(), - "MockBackendProtectedContent") != supportedDevices.cend()); - CHECK(std::find(supportedDevices.cbegin(), - supportedDevices.cend(), - "MockBackend") == supportedDevices.cend()); - CHECK(std::find(supportedDevices.cbegin(), - supportedDevices.cend(), - "SillyMockBackend") == supportedDevices.cend()); - } - - // If the runtime is in protected mode only backends that support protected content should be added - { - IRuntime::CreationOptions creationOptions; - creationOptions.m_ProtectedMode = true; - - IRuntimePtr run = IRuntime::Create(creationOptions); - - const armnn::BackendIdSet supportedDevices = run->GetDeviceSpec().GetSupportedBackends(); - // Only the MockBackend that claims support for protected content should show up in the - // runtimes supported backends list - CHECK(std::find(supportedDevices.cbegin(), - supportedDevices.cend(), - "MockBackendProtectedContent") != supportedDevices.cend()); - - CHECK(std::find(supportedDevices.cbegin(), - supportedDevices.cend(), - "MockBackend") == supportedDevices.cend()); - } - -} -} diff --git a/src/backends/aclCommon/BaseMemoryManager.cpp b/src/backends/aclCommon/BaseMemoryManager.cpp index 45e0480a84..c60a4a04ae 100644 --- a/src/backends/aclCommon/BaseMemoryManager.cpp +++ b/src/backends/aclCommon/BaseMemoryManager.cpp @@ -15,7 +15,7 @@ namespace armnn { #if defined(ARMCOMPUTENEON_ENABLED) || defined(ARMCOMPUTECL_ENABLED) -BaseMemoryManager::BaseMemoryManager(std::unique_ptr<arm_compute::IAllocator> alloc, +BaseMemoryManager::BaseMemoryManager(std::shared_ptr<arm_compute::IAllocator> alloc, MemoryAffinity memoryAffinity) { ARMNN_ASSERT(alloc); diff --git a/src/backends/aclCommon/BaseMemoryManager.hpp b/src/backends/aclCommon/BaseMemoryManager.hpp index e80abf0edd..e3ffd188a1 100644 --- a/src/backends/aclCommon/BaseMemoryManager.hpp +++ b/src/backends/aclCommon/BaseMemoryManager.hpp @@ -15,6 +15,7 @@ #include <arm_compute/runtime/IAllocator.h> #include <arm_compute/runtime/IMemoryGroup.h> #include <arm_compute/runtime/MemoryManagerOnDemand.h> +#include <arm_compute/runtime/CL/CLTensorAllocator.h> #endif namespace armnn @@ -36,14 +37,14 @@ public: void Release() override; #if defined(ARMCOMPUTENEON_ENABLED) || defined(ARMCOMPUTECL_ENABLED) - BaseMemoryManager(std::unique_ptr<arm_compute::IAllocator> alloc, MemoryAffinity memoryAffinity); + BaseMemoryManager(std::shared_ptr<arm_compute::IAllocator> alloc, MemoryAffinity memoryAffinity); std::shared_ptr<arm_compute::MemoryManagerOnDemand>& GetIntraLayerManager() { return m_IntraLayerMemoryMgr; } std::shared_ptr<arm_compute::MemoryManagerOnDemand>& GetInterLayerManager() { return m_InterLayerMemoryMgr; } std::shared_ptr<arm_compute::IMemoryGroup>& GetInterLayerMemoryGroup() { return m_InterLayerMemoryGroup; } protected: - std::unique_ptr<arm_compute::IAllocator> m_Allocator; + std::shared_ptr<arm_compute::IAllocator> m_Allocator; std::shared_ptr<arm_compute::MemoryManagerOnDemand> m_IntraLayerMemoryMgr; std::shared_ptr<arm_compute::MemoryManagerOnDemand> m_InterLayerMemoryMgr; std::shared_ptr<arm_compute::IMemoryGroup> m_InterLayerMemoryGroup; @@ -81,9 +82,10 @@ public: ClMemoryManager() {} virtual ~ClMemoryManager() {} - ClMemoryManager(std::unique_ptr<arm_compute::IAllocator> alloc) + ClMemoryManager(std::shared_ptr<arm_compute::IAllocator> alloc) : BaseMemoryManager(std::move(alloc), MemoryAffinity::Buffer) { + arm_compute::CLTensorAllocator::set_global_allocator(alloc.get()); m_InterLayerMemoryGroup = CreateMemoryGroup(m_InterLayerMemoryMgr); } diff --git a/src/backends/backendsCommon/test/CompatibilityTests.cpp b/src/backends/backendsCommon/test/CompatibilityTests.cpp index 12cb5e9956..34baad9d0c 100644 --- a/src/backends/backendsCommon/test/CompatibilityTests.cpp +++ b/src/backends/backendsCommon/test/CompatibilityTests.cpp @@ -3,8 +3,12 @@ // SPDX-License-Identifier: MIT // +#if defined(ARMCOMPUTECL_ENABLED) #include <cl/ClBackend.hpp> +#endif +#if defined(ARMCOMPUTENEON_ENABLED) #include <neon/NeonBackend.hpp> +#endif #include <reference/RefBackend.hpp> #include <armnn/BackendHelper.hpp> diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp index f1e52c1998..b85232e75c 100644 --- a/src/backends/cl/ClBackend.cpp +++ b/src/backends/cl/ClBackend.cpp @@ -49,6 +49,10 @@ const BackendId& ClBackend::GetIdStatic() IBackendInternal::IMemoryManagerUniquePtr ClBackend::CreateMemoryManager() const { + if (m_UsingCustomAllocator) + { + return std::make_unique<ClMemoryManager>(m_CustomAllocator); + } return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); } @@ -69,7 +73,15 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( TensorHandleFactoryRegistry& registry) const { - auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); + std::shared_ptr<ClMemoryManager> memoryManager; + if (m_UsingCustomAllocator) + { + memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator); + } + else + { + memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); + } registry.RegisterMemoryManager(memoryManager); registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager)); @@ -83,7 +95,15 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions) const { - auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); + std::shared_ptr<ClMemoryManager> memoryManager; + if (m_UsingCustomAllocator) + { + memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator); + } + else + { + memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); + } registry.RegisterMemoryManager(memoryManager); registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager)); @@ -100,7 +120,15 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( MemorySourceFlags inputFlags, MemorySourceFlags outputFlags) const { - auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); + std::shared_ptr<ClMemoryManager> memoryManager; + if (m_UsingCustomAllocator) + { + memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator); + } + else + { + memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); + } registry.RegisterMemoryManager(memoryManager); registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager)); @@ -118,10 +146,18 @@ std::vector<ITensorHandleFactory::FactoryId> ClBackend::GetHandleFactoryPreferen void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) { - auto mgr = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); + std::shared_ptr<ClMemoryManager> memoryManager; + if (m_UsingCustomAllocator) + { + memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator); + } + else + { + memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); + } - registry.RegisterMemoryManager(mgr); - registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(mgr)); + registry.RegisterMemoryManager(memoryManager); + registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager)); registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>( static_cast<MemorySourceFlags>(MemorySource::Malloc), static_cast<MemorySourceFlags>(MemorySource::Malloc))); } @@ -130,10 +166,18 @@ void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& regis MemorySourceFlags inputFlags, MemorySourceFlags outputFlags) { - auto mgr = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); + std::shared_ptr<ClMemoryManager> memoryManager; + if (m_UsingCustomAllocator) + { + memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator); + } + else + { + memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); + } - registry.RegisterMemoryManager(mgr); - registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(mgr)); + registry.RegisterMemoryManager(memoryManager); + registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager)); registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(inputFlags, outputFlags)); } diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp index c742c0b204..c63bd25c56 100644 --- a/src/backends/cl/ClBackend.hpp +++ b/src/backends/cl/ClBackend.hpp @@ -6,6 +6,15 @@ #include <armnn/backends/IBackendInternal.hpp> +#include <arm_compute/core/Types.h> +#include <arm_compute/runtime/CL/CLBufferAllocator.h> + +#include <aclCommon/BaseMemoryManager.hpp> +#include <arm_compute/runtime/CL/CLMemoryRegion.h> + +#include <arm_compute/core/CL/CLKernelLibrary.h> +#include <CL/cl_ext.h> + namespace armnn { @@ -20,7 +29,12 @@ const BackendCapabilities gpuAccCapabilities("GpuAcc", class ClBackend : public IBackendInternal { public: - ClBackend() : m_EnableCustomAllocator(false) {}; + ClBackend() : m_CustomAllocator(nullptr) {}; + ClBackend(std::shared_ptr<ICustomAllocator> allocator) + { + std::string err; + UseCustomMemoryAllocator(allocator, err); + } ~ClBackend() = default; static const BackendId& GetIdStatic(); @@ -72,17 +86,119 @@ public: return gpuAccCapabilities; }; - virtual bool UseCustomMemoryAllocator(armnn::Optional<std::string&> errMsg) override + virtual bool UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator, + armnn::Optional<std::string&> errMsg) override { IgnoreUnused(errMsg); + ARMNN_LOG(info) << "Using Custom Allocator for ClBackend"; // Set flag to signal the backend to use a custom memory allocator - m_EnableCustomAllocator = true; - - return m_EnableCustomAllocator; + m_CustomAllocator = std::make_shared<ClBackendCustomAllocatorWrapper>(std::move(allocator)); + m_UsingCustomAllocator = true; + return m_UsingCustomAllocator; } - bool m_EnableCustomAllocator; + // Cl requires a arm_compute::IAllocator we wrap the Arm NN ICustomAllocator to achieve this + class ClBackendCustomAllocatorWrapper : public arm_compute::IAllocator + { + public: + ClBackendCustomAllocatorWrapper(std::shared_ptr<ICustomAllocator> alloc) : m_CustomAllocator(alloc) + {} + // Inherited methods overridden: + void* allocate(size_t size, size_t alignment) override + { + auto alloc = m_CustomAllocator->allocate(size, alignment); + return MapAllocatedMemory(alloc, size, m_CustomAllocator->GetMemorySourceType()); + } + void free(void* ptr) override + { + auto hostMemPtr = m_AllocatedBufferMappings[ptr]; + clReleaseMemObject(static_cast<cl_mem>(ptr)); + m_CustomAllocator->free(hostMemPtr); + } + std::unique_ptr<arm_compute::IMemoryRegion> make_region(size_t size, size_t alignment) override + { + auto hostMemPtr = m_CustomAllocator->allocate(size, alignment); + cl_mem buffer = MapAllocatedMemory(hostMemPtr, size, m_CustomAllocator->GetMemorySourceType()); + + return std::make_unique<ClBackendCustomAllocatorMemoryRegion>(cl::Buffer(buffer), hostMemPtr); + } + private: + cl_mem MapAllocatedMemory(void* memory, size_t size, MemorySource source) + { + // Round the size of the buffer to a multiple of the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE + auto cachelineAlignment = + arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>(); + auto roundedSize = cachelineAlignment + size - (size % cachelineAlignment); + + if (source == MemorySource::Malloc) + { + const cl_import_properties_arm importProperties[] = + { + CL_IMPORT_TYPE_ARM, + CL_IMPORT_TYPE_HOST_ARM, + 0 + }; + cl_int error = CL_SUCCESS; + cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(), + CL_MEM_READ_WRITE, + importProperties, + memory, + roundedSize, + &error); + if (error == CL_SUCCESS) + { + m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory)); + return buffer; + } + throw armnn::Exception( + "Mapping allocated memory from CustomMemoryAllocator failed, errcode: " + std::to_string(error)); + } + throw armnn::Exception( + "Attempting to allocate memory with unsupported MemorySource type in CustomAllocator"); + } + std::shared_ptr<ICustomAllocator> m_CustomAllocator; + std::map<void*, void*> m_AllocatedBufferMappings; + }; + + class ClBackendCustomAllocatorMemoryRegion : public arm_compute::ICLMemoryRegion + { + public: + // We need to have a new version of ICLMemoryRegion which holds a hostMemPtr to allow for cpu copy access + ClBackendCustomAllocatorMemoryRegion(const cl::Buffer &buffer, void* hostMemPtr) + : ICLMemoryRegion(buffer.getInfo<CL_MEM_SIZE>()) + { + _mem = buffer; + m_HostMemPtr = hostMemPtr; + } + + // Inherited methods overridden : + void* ptr() override + { + return nullptr; + } + + void* map(cl::CommandQueue &q, bool blocking) override + { + armnn::IgnoreUnused(q, blocking); + if (m_HostMemPtr == nullptr) + { + throw armnn::Exception("ClBackend: Attempting to map memory with an invalid host ptr"); + } + _mapping = m_HostMemPtr; + return _mapping; + } + + void unmap(cl::CommandQueue &q) override + { + armnn::IgnoreUnused(q); + _mapping = nullptr; + } + void* m_HostMemPtr = nullptr; + }; + + std::shared_ptr<ClBackendCustomAllocatorWrapper> m_CustomAllocator; + bool m_UsingCustomAllocator = false; }; } // namespace armnn diff --git a/src/backends/cl/ClImportTensorHandle.hpp b/src/backends/cl/ClImportTensorHandle.hpp index 3fca7cb127..69cd4a6d81 100644 --- a/src/backends/cl/ClImportTensorHandle.hpp +++ b/src/backends/cl/ClImportTensorHandle.hpp @@ -140,10 +140,16 @@ public: private: bool ClImport(const cl_import_properties_arm* importProperties, void* memory) { - const size_t totalBytes = m_Tensor.info()->total_size(); + size_t totalBytes = m_Tensor.info()->total_size(); + + // Round the size of the buffer to a multiple of the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE + auto cachelineAlignment = + arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>(); + auto roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment); + cl_int error = CL_SUCCESS; cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(), - CL_MEM_READ_WRITE, importProperties, memory, totalBytes, &error); + CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error); if (error != CL_SUCCESS) { throw MemoryImportException("ClImportTensorHandle::Invalid imported memory" + std::to_string(error)); diff --git a/src/backends/cl/ClRegistryInitializer.cpp b/src/backends/cl/ClRegistryInitializer.cpp index 8decd6f689..aadc14bd68 100644 --- a/src/backends/cl/ClRegistryInitializer.cpp +++ b/src/backends/cl/ClRegistryInitializer.cpp @@ -18,6 +18,14 @@ static BackendRegistry::StaticRegistryInitializer g_RegisterHelper ClBackend::GetIdStatic(), []() { + // Check if we have a CustomMemoryAllocator associated with the backend + // and if so register it with the backend. + auto customAllocators = BackendRegistryInstance().GetAllocators(); + auto allocatorIterator = customAllocators.find(ClBackend::GetIdStatic()); + if (allocatorIterator != customAllocators.end()) + { + return IBackendInternalUniquePtr(new ClBackend(allocatorIterator->second)); + } return IBackendInternalUniquePtr(new ClBackend); } }; diff --git a/src/backends/cl/test/CMakeLists.txt b/src/backends/cl/test/CMakeLists.txt index 6662a1e659..41cbe24c15 100644 --- a/src/backends/cl/test/CMakeLists.txt +++ b/src/backends/cl/test/CMakeLists.txt @@ -6,6 +6,7 @@ list(APPEND armnnClBackendUnitTests_sources ClContextControlFixture.hpp ClContextSerializerTests.cpp + ClCustomAllocatorTests.cpp ClCreateWorkloadTests.cpp ClEndToEndTests.cpp ClImportTensorHandleFactoryTests.cpp diff --git a/src/backends/cl/test/ClCustomAllocatorTests.cpp b/src/backends/cl/test/ClCustomAllocatorTests.cpp new file mode 100644 index 0000000000..4d1a0e1cfb --- /dev/null +++ b/src/backends/cl/test/ClCustomAllocatorTests.cpp @@ -0,0 +1,160 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include <armnn/backends/ICustomAllocator.hpp> +#include <armnn/Descriptors.hpp> +#include <armnn/Exceptions.hpp> +#include <armnn/INetwork.hpp> +#include <armnn/IRuntime.hpp> +#include <armnn/Utils.hpp> +#include <armnn/BackendRegistry.hpp> +#include <cl/ClBackend.hpp> + +#include <doctest/doctest.h> + +// Contains the OpenCl interfaces for mapping memory in the Gpu Page Tables +// Requires the OpenCl backend to be included (GpuAcc) +#include <arm_compute/core/CL/CLKernelLibrary.h> +#include <CL/cl_ext.h> +#include <arm_compute/runtime/CL/CLScheduler.h> + + +/** Sample implementation of ICustomAllocator for use with the ClBackend. + * Note: any memory allocated must be host accessible with write access to allow for weights and biases + * to be passed in. Read access is not required.. */ +class SampleClBackendCustomAllocator : public armnn::ICustomAllocator +{ +public: + SampleClBackendCustomAllocator() = default; + + void* allocate(size_t size, size_t alignment) + { + // If alignment is 0 just use the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE for alignment + if (alignment == 0) + { + alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>(); + } + size_t space = size + alignment + alignment; + auto allocatedMemPtr = std::malloc(space * sizeof(size_t)); + + if (std::align(alignment, size, allocatedMemPtr, space) == nullptr) + { + throw armnn::Exception("SampleClBackendCustomAllocator::Alignment failed"); + } + return allocatedMemPtr; + } + + /** Interface to be implemented by the child class to free the allocated tensor */ + void free(void* ptr) + { + std::free(ptr); + } + + armnn::MemorySource GetMemorySourceType() + { + return armnn::MemorySource::Malloc; + } +}; + +TEST_SUITE("ClCustomAllocatorTests") +{ + +// This is a copy of the SimpleSample app modified to use a custom +// allocator for the clbackend. It creates a FullyConnected network with a single layer +// taking a single number as an input +TEST_CASE("ClCustomAllocatorTest") +{ + using namespace armnn; + + float number = 3; + + // Construct ArmNN network + armnn::NetworkId networkIdentifier; + INetworkPtr myNetwork = INetwork::Create(); + + armnn::FullyConnectedDescriptor fullyConnectedDesc; + float weightsData[] = {1.0f}; // Identity + TensorInfo weightsInfo(TensorShape({1, 1}), DataType::Float32); + weightsInfo.SetConstant(true); + armnn::ConstTensor weights(weightsInfo, weightsData); + + ARMNN_NO_DEPRECATE_WARN_BEGIN + IConnectableLayer* fullyConnected = myNetwork->AddFullyConnectedLayer(fullyConnectedDesc, + weights, + EmptyOptional(), + "fully connected"); + ARMNN_NO_DEPRECATE_WARN_END + IConnectableLayer* InputLayer = myNetwork->AddInputLayer(0); + IConnectableLayer* OutputLayer = myNetwork->AddOutputLayer(0); + InputLayer->GetOutputSlot(0).Connect(fullyConnected->GetInputSlot(0)); + fullyConnected->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0)); + + + // Create ArmNN runtime + IRuntime::CreationOptions options; // default options + auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>(); + options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}}; + IRuntimePtr run = IRuntime::Create(options); + + //Set the tensors in the network. + TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32); + InputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo); + + TensorInfo outputTensorInfo(TensorShape({1, 1}), DataType::Float32); + fullyConnected->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); + + // Optimise ArmNN network + OptimizerOptions optOptions; + optOptions.m_ImportEnabled = true; + armnn::IOptimizedNetworkPtr optNet = Optimize(*myNetwork, {"GpuAcc"}, run->GetDeviceSpec(), optOptions); + CHECK(optNet); + + // Load graph into runtime + std::string ignoredErrorMessage; + INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc); + run->LoadNetwork(networkIdentifier, std::move(optNet), ignoredErrorMessage, networkProperties); + + // Creates structures for input & output + unsigned int numElements = inputTensorInfo.GetNumElements(); + size_t totalBytes = numElements * sizeof(float); + + const size_t alignment = + arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>(); + + void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment); + + // Input with negative values + auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr); + std::fill_n(inputPtr, numElements, number); + + void* alignedOutputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment); + auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr); + std::fill_n(outputPtr, numElements, -10.0f); + + armnn::InputTensors inputTensors + { + {0, armnn::ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), alignedInputPtr)}, + }; + armnn::OutputTensors outputTensors + { + {0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), alignedOutputPtr)} + }; + + // Execute network + run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors); + run->UnloadNetwork(networkIdentifier); + + + // Tell the CLBackend to sync memory so we can read the output. + arm_compute::CLScheduler::get().sync(); + auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr); + + run->UnloadNetwork(networkIdentifier); + CHECK(outputResult[0] == number); + auto& backendRegistry = armnn::BackendRegistryInstance(); + backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic()); +} + +} // test suite ClCustomAllocatorTests
\ No newline at end of file diff --git a/src/backends/cl/test/ClImportTensorHandleTests.cpp b/src/backends/cl/test/ClImportTensorHandleTests.cpp index 931729a736..6b1d3521d5 100644 --- a/src/backends/cl/test/ClImportTensorHandleTests.cpp +++ b/src/backends/cl/test/ClImportTensorHandleTests.cpp @@ -61,7 +61,7 @@ TEST_CASE_FIXTURE(ClContextControlFixture, "ClMallocImport") // Validate result by checking that the output has no negative values for(unsigned int i = 0; i < numElements; ++i) { - CHECK(typedPtr[i] >= 0); + CHECK(typedPtr[i] == 0); } } |