aboutsummaryrefslogtreecommitdiff
path: root/src/backends
diff options
context:
space:
mode:
authorJan Eilers <jan.eilers@arm.com>2021-07-22 13:17:04 +0100
committerDavid Monahan <david.monahan@arm.com>2021-08-10 16:35:33 +0100
commitc1c872f12797ef6fe52c4589113e7efc353e56eb (patch)
tree911320c5306f9d2273ee76201806bfb12cbe4cd3 /src/backends
parentf487486c843a38fced90229923433d09f99fc2e5 (diff)
downloadarmnn-c1c872f12797ef6fe52c4589113e7efc353e56eb.tar.gz
Adds CustomAllocator interface and Sample App
* Updates the runtime options with a CustomAllocatorMap which allows to define a CustomAllocator for specific backends * Change IBackendInternal interface to use a shared pointer to a custom allocator * Update ClBackend.hpp/cpp to use the CustomAllocator * Adds an example application and unit test which uses a CustomAllocator for GpuAcc * Refactor of the interface to use MemorySource instead of the user Mapping cl_mem directly * Modify the BackendRegistry to also hold a registry of CustomAllocators * BackendRegistry Deregister will also deregister any allocators associated with that backend id * set_global_allocator within the BaseMemoryManager so that it always matches the currently used allocator Signed-off-by: Jan Eilers <jan.eilers@arm.com> Change-Id: I156d819686021865f4375e6cb7a5c5dec8fee9e8 Signed-off-by: David Monahan <david.monahan@arm.com>
Diffstat (limited to 'src/backends')
-rw-r--r--src/backends/aclCommon/BaseMemoryManager.cpp2
-rw-r--r--src/backends/aclCommon/BaseMemoryManager.hpp8
-rw-r--r--src/backends/backendsCommon/test/CompatibilityTests.cpp4
-rw-r--r--src/backends/cl/ClBackend.cpp62
-rw-r--r--src/backends/cl/ClBackend.hpp128
-rw-r--r--src/backends/cl/ClImportTensorHandle.hpp10
-rw-r--r--src/backends/cl/ClRegistryInitializer.cpp8
-rw-r--r--src/backends/cl/test/CMakeLists.txt1
-rw-r--r--src/backends/cl/test/ClCustomAllocatorTests.cpp160
-rw-r--r--src/backends/cl/test/ClImportTensorHandleTests.cpp2
10 files changed, 363 insertions, 22 deletions
diff --git a/src/backends/aclCommon/BaseMemoryManager.cpp b/src/backends/aclCommon/BaseMemoryManager.cpp
index 45e0480a84..c60a4a04ae 100644
--- a/src/backends/aclCommon/BaseMemoryManager.cpp
+++ b/src/backends/aclCommon/BaseMemoryManager.cpp
@@ -15,7 +15,7 @@ namespace armnn
{
#if defined(ARMCOMPUTENEON_ENABLED) || defined(ARMCOMPUTECL_ENABLED)
-BaseMemoryManager::BaseMemoryManager(std::unique_ptr<arm_compute::IAllocator> alloc,
+BaseMemoryManager::BaseMemoryManager(std::shared_ptr<arm_compute::IAllocator> alloc,
MemoryAffinity memoryAffinity)
{
ARMNN_ASSERT(alloc);
diff --git a/src/backends/aclCommon/BaseMemoryManager.hpp b/src/backends/aclCommon/BaseMemoryManager.hpp
index e80abf0edd..e3ffd188a1 100644
--- a/src/backends/aclCommon/BaseMemoryManager.hpp
+++ b/src/backends/aclCommon/BaseMemoryManager.hpp
@@ -15,6 +15,7 @@
#include <arm_compute/runtime/IAllocator.h>
#include <arm_compute/runtime/IMemoryGroup.h>
#include <arm_compute/runtime/MemoryManagerOnDemand.h>
+#include <arm_compute/runtime/CL/CLTensorAllocator.h>
#endif
namespace armnn
@@ -36,14 +37,14 @@ public:
void Release() override;
#if defined(ARMCOMPUTENEON_ENABLED) || defined(ARMCOMPUTECL_ENABLED)
- BaseMemoryManager(std::unique_ptr<arm_compute::IAllocator> alloc, MemoryAffinity memoryAffinity);
+ BaseMemoryManager(std::shared_ptr<arm_compute::IAllocator> alloc, MemoryAffinity memoryAffinity);
std::shared_ptr<arm_compute::MemoryManagerOnDemand>& GetIntraLayerManager() { return m_IntraLayerMemoryMgr; }
std::shared_ptr<arm_compute::MemoryManagerOnDemand>& GetInterLayerManager() { return m_InterLayerMemoryMgr; }
std::shared_ptr<arm_compute::IMemoryGroup>& GetInterLayerMemoryGroup() { return m_InterLayerMemoryGroup; }
protected:
- std::unique_ptr<arm_compute::IAllocator> m_Allocator;
+ std::shared_ptr<arm_compute::IAllocator> m_Allocator;
std::shared_ptr<arm_compute::MemoryManagerOnDemand> m_IntraLayerMemoryMgr;
std::shared_ptr<arm_compute::MemoryManagerOnDemand> m_InterLayerMemoryMgr;
std::shared_ptr<arm_compute::IMemoryGroup> m_InterLayerMemoryGroup;
@@ -81,9 +82,10 @@ public:
ClMemoryManager() {}
virtual ~ClMemoryManager() {}
- ClMemoryManager(std::unique_ptr<arm_compute::IAllocator> alloc)
+ ClMemoryManager(std::shared_ptr<arm_compute::IAllocator> alloc)
: BaseMemoryManager(std::move(alloc), MemoryAffinity::Buffer)
{
+ arm_compute::CLTensorAllocator::set_global_allocator(alloc.get());
m_InterLayerMemoryGroup = CreateMemoryGroup(m_InterLayerMemoryMgr);
}
diff --git a/src/backends/backendsCommon/test/CompatibilityTests.cpp b/src/backends/backendsCommon/test/CompatibilityTests.cpp
index 12cb5e9956..34baad9d0c 100644
--- a/src/backends/backendsCommon/test/CompatibilityTests.cpp
+++ b/src/backends/backendsCommon/test/CompatibilityTests.cpp
@@ -3,8 +3,12 @@
// SPDX-License-Identifier: MIT
//
+#if defined(ARMCOMPUTECL_ENABLED)
#include <cl/ClBackend.hpp>
+#endif
+#if defined(ARMCOMPUTENEON_ENABLED)
#include <neon/NeonBackend.hpp>
+#endif
#include <reference/RefBackend.hpp>
#include <armnn/BackendHelper.hpp>
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp
index f1e52c1998..b85232e75c 100644
--- a/src/backends/cl/ClBackend.cpp
+++ b/src/backends/cl/ClBackend.cpp
@@ -49,6 +49,10 @@ const BackendId& ClBackend::GetIdStatic()
IBackendInternal::IMemoryManagerUniquePtr ClBackend::CreateMemoryManager() const
{
+ if (m_UsingCustomAllocator)
+ {
+ return std::make_unique<ClMemoryManager>(m_CustomAllocator);
+ }
return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
}
@@ -69,7 +73,15 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
TensorHandleFactoryRegistry& registry) const
{
- auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+ std::shared_ptr<ClMemoryManager> memoryManager;
+ if (m_UsingCustomAllocator)
+ {
+ memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
+ }
+ else
+ {
+ memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+ }
registry.RegisterMemoryManager(memoryManager);
registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
@@ -83,7 +95,15 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions) const
{
- auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+ std::shared_ptr<ClMemoryManager> memoryManager;
+ if (m_UsingCustomAllocator)
+ {
+ memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
+ }
+ else
+ {
+ memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+ }
registry.RegisterMemoryManager(memoryManager);
registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
@@ -100,7 +120,15 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
MemorySourceFlags inputFlags,
MemorySourceFlags outputFlags) const
{
- auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+ std::shared_ptr<ClMemoryManager> memoryManager;
+ if (m_UsingCustomAllocator)
+ {
+ memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
+ }
+ else
+ {
+ memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+ }
registry.RegisterMemoryManager(memoryManager);
registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
@@ -118,10 +146,18 @@ std::vector<ITensorHandleFactory::FactoryId> ClBackend::GetHandleFactoryPreferen
void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry)
{
- auto mgr = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+ std::shared_ptr<ClMemoryManager> memoryManager;
+ if (m_UsingCustomAllocator)
+ {
+ memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
+ }
+ else
+ {
+ memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+ }
- registry.RegisterMemoryManager(mgr);
- registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(mgr));
+ registry.RegisterMemoryManager(memoryManager);
+ registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(
static_cast<MemorySourceFlags>(MemorySource::Malloc), static_cast<MemorySourceFlags>(MemorySource::Malloc)));
}
@@ -130,10 +166,18 @@ void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& regis
MemorySourceFlags inputFlags,
MemorySourceFlags outputFlags)
{
- auto mgr = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+ std::shared_ptr<ClMemoryManager> memoryManager;
+ if (m_UsingCustomAllocator)
+ {
+ memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
+ }
+ else
+ {
+ memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+ }
- registry.RegisterMemoryManager(mgr);
- registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(mgr));
+ registry.RegisterMemoryManager(memoryManager);
+ registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(inputFlags, outputFlags));
}
diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp
index c742c0b204..c63bd25c56 100644
--- a/src/backends/cl/ClBackend.hpp
+++ b/src/backends/cl/ClBackend.hpp
@@ -6,6 +6,15 @@
#include <armnn/backends/IBackendInternal.hpp>
+#include <arm_compute/core/Types.h>
+#include <arm_compute/runtime/CL/CLBufferAllocator.h>
+
+#include <aclCommon/BaseMemoryManager.hpp>
+#include <arm_compute/runtime/CL/CLMemoryRegion.h>
+
+#include <arm_compute/core/CL/CLKernelLibrary.h>
+#include <CL/cl_ext.h>
+
namespace armnn
{
@@ -20,7 +29,12 @@ const BackendCapabilities gpuAccCapabilities("GpuAcc",
class ClBackend : public IBackendInternal
{
public:
- ClBackend() : m_EnableCustomAllocator(false) {};
+ ClBackend() : m_CustomAllocator(nullptr) {};
+ ClBackend(std::shared_ptr<ICustomAllocator> allocator)
+ {
+ std::string err;
+ UseCustomMemoryAllocator(allocator, err);
+ }
~ClBackend() = default;
static const BackendId& GetIdStatic();
@@ -72,17 +86,119 @@ public:
return gpuAccCapabilities;
};
- virtual bool UseCustomMemoryAllocator(armnn::Optional<std::string&> errMsg) override
+ virtual bool UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator,
+ armnn::Optional<std::string&> errMsg) override
{
IgnoreUnused(errMsg);
+ ARMNN_LOG(info) << "Using Custom Allocator for ClBackend";
// Set flag to signal the backend to use a custom memory allocator
- m_EnableCustomAllocator = true;
-
- return m_EnableCustomAllocator;
+ m_CustomAllocator = std::make_shared<ClBackendCustomAllocatorWrapper>(std::move(allocator));
+ m_UsingCustomAllocator = true;
+ return m_UsingCustomAllocator;
}
- bool m_EnableCustomAllocator;
+ // Cl requires a arm_compute::IAllocator we wrap the Arm NN ICustomAllocator to achieve this
+ class ClBackendCustomAllocatorWrapper : public arm_compute::IAllocator
+ {
+ public:
+ ClBackendCustomAllocatorWrapper(std::shared_ptr<ICustomAllocator> alloc) : m_CustomAllocator(alloc)
+ {}
+ // Inherited methods overridden:
+ void* allocate(size_t size, size_t alignment) override
+ {
+ auto alloc = m_CustomAllocator->allocate(size, alignment);
+ return MapAllocatedMemory(alloc, size, m_CustomAllocator->GetMemorySourceType());
+ }
+ void free(void* ptr) override
+ {
+ auto hostMemPtr = m_AllocatedBufferMappings[ptr];
+ clReleaseMemObject(static_cast<cl_mem>(ptr));
+ m_CustomAllocator->free(hostMemPtr);
+ }
+ std::unique_ptr<arm_compute::IMemoryRegion> make_region(size_t size, size_t alignment) override
+ {
+ auto hostMemPtr = m_CustomAllocator->allocate(size, alignment);
+ cl_mem buffer = MapAllocatedMemory(hostMemPtr, size, m_CustomAllocator->GetMemorySourceType());
+
+ return std::make_unique<ClBackendCustomAllocatorMemoryRegion>(cl::Buffer(buffer), hostMemPtr);
+ }
+ private:
+ cl_mem MapAllocatedMemory(void* memory, size_t size, MemorySource source)
+ {
+ // Round the size of the buffer to a multiple of the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
+ auto cachelineAlignment =
+ arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
+ auto roundedSize = cachelineAlignment + size - (size % cachelineAlignment);
+
+ if (source == MemorySource::Malloc)
+ {
+ const cl_import_properties_arm importProperties[] =
+ {
+ CL_IMPORT_TYPE_ARM,
+ CL_IMPORT_TYPE_HOST_ARM,
+ 0
+ };
+ cl_int error = CL_SUCCESS;
+ cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
+ CL_MEM_READ_WRITE,
+ importProperties,
+ memory,
+ roundedSize,
+ &error);
+ if (error == CL_SUCCESS)
+ {
+ m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory));
+ return buffer;
+ }
+ throw armnn::Exception(
+ "Mapping allocated memory from CustomMemoryAllocator failed, errcode: " + std::to_string(error));
+ }
+ throw armnn::Exception(
+ "Attempting to allocate memory with unsupported MemorySource type in CustomAllocator");
+ }
+ std::shared_ptr<ICustomAllocator> m_CustomAllocator;
+ std::map<void*, void*> m_AllocatedBufferMappings;
+ };
+
+ class ClBackendCustomAllocatorMemoryRegion : public arm_compute::ICLMemoryRegion
+ {
+ public:
+ // We need to have a new version of ICLMemoryRegion which holds a hostMemPtr to allow for cpu copy access
+ ClBackendCustomAllocatorMemoryRegion(const cl::Buffer &buffer, void* hostMemPtr)
+ : ICLMemoryRegion(buffer.getInfo<CL_MEM_SIZE>())
+ {
+ _mem = buffer;
+ m_HostMemPtr = hostMemPtr;
+ }
+
+ // Inherited methods overridden :
+ void* ptr() override
+ {
+ return nullptr;
+ }
+
+ void* map(cl::CommandQueue &q, bool blocking) override
+ {
+ armnn::IgnoreUnused(q, blocking);
+ if (m_HostMemPtr == nullptr)
+ {
+ throw armnn::Exception("ClBackend: Attempting to map memory with an invalid host ptr");
+ }
+ _mapping = m_HostMemPtr;
+ return _mapping;
+ }
+
+ void unmap(cl::CommandQueue &q) override
+ {
+ armnn::IgnoreUnused(q);
+ _mapping = nullptr;
+ }
+ void* m_HostMemPtr = nullptr;
+ };
+
+ std::shared_ptr<ClBackendCustomAllocatorWrapper> m_CustomAllocator;
+ bool m_UsingCustomAllocator = false;
};
} // namespace armnn
diff --git a/src/backends/cl/ClImportTensorHandle.hpp b/src/backends/cl/ClImportTensorHandle.hpp
index 3fca7cb127..69cd4a6d81 100644
--- a/src/backends/cl/ClImportTensorHandle.hpp
+++ b/src/backends/cl/ClImportTensorHandle.hpp
@@ -140,10 +140,16 @@ public:
private:
bool ClImport(const cl_import_properties_arm* importProperties, void* memory)
{
- const size_t totalBytes = m_Tensor.info()->total_size();
+ size_t totalBytes = m_Tensor.info()->total_size();
+
+ // Round the size of the buffer to a multiple of the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
+ auto cachelineAlignment =
+ arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
+ auto roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
+
cl_int error = CL_SUCCESS;
cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
- CL_MEM_READ_WRITE, importProperties, memory, totalBytes, &error);
+ CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
if (error != CL_SUCCESS)
{
throw MemoryImportException("ClImportTensorHandle::Invalid imported memory" + std::to_string(error));
diff --git a/src/backends/cl/ClRegistryInitializer.cpp b/src/backends/cl/ClRegistryInitializer.cpp
index 8decd6f689..aadc14bd68 100644
--- a/src/backends/cl/ClRegistryInitializer.cpp
+++ b/src/backends/cl/ClRegistryInitializer.cpp
@@ -18,6 +18,14 @@ static BackendRegistry::StaticRegistryInitializer g_RegisterHelper
ClBackend::GetIdStatic(),
[]()
{
+ // Check if we have a CustomMemoryAllocator associated with the backend
+ // and if so register it with the backend.
+ auto customAllocators = BackendRegistryInstance().GetAllocators();
+ auto allocatorIterator = customAllocators.find(ClBackend::GetIdStatic());
+ if (allocatorIterator != customAllocators.end())
+ {
+ return IBackendInternalUniquePtr(new ClBackend(allocatorIterator->second));
+ }
return IBackendInternalUniquePtr(new ClBackend);
}
};
diff --git a/src/backends/cl/test/CMakeLists.txt b/src/backends/cl/test/CMakeLists.txt
index 6662a1e659..41cbe24c15 100644
--- a/src/backends/cl/test/CMakeLists.txt
+++ b/src/backends/cl/test/CMakeLists.txt
@@ -6,6 +6,7 @@
list(APPEND armnnClBackendUnitTests_sources
ClContextControlFixture.hpp
ClContextSerializerTests.cpp
+ ClCustomAllocatorTests.cpp
ClCreateWorkloadTests.cpp
ClEndToEndTests.cpp
ClImportTensorHandleFactoryTests.cpp
diff --git a/src/backends/cl/test/ClCustomAllocatorTests.cpp b/src/backends/cl/test/ClCustomAllocatorTests.cpp
new file mode 100644
index 0000000000..4d1a0e1cfb
--- /dev/null
+++ b/src/backends/cl/test/ClCustomAllocatorTests.cpp
@@ -0,0 +1,160 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <armnn/backends/ICustomAllocator.hpp>
+#include <armnn/Descriptors.hpp>
+#include <armnn/Exceptions.hpp>
+#include <armnn/INetwork.hpp>
+#include <armnn/IRuntime.hpp>
+#include <armnn/Utils.hpp>
+#include <armnn/BackendRegistry.hpp>
+#include <cl/ClBackend.hpp>
+
+#include <doctest/doctest.h>
+
+// Contains the OpenCl interfaces for mapping memory in the Gpu Page Tables
+// Requires the OpenCl backend to be included (GpuAcc)
+#include <arm_compute/core/CL/CLKernelLibrary.h>
+#include <CL/cl_ext.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+
+/** Sample implementation of ICustomAllocator for use with the ClBackend.
+ * Note: any memory allocated must be host accessible with write access to allow for weights and biases
+ * to be passed in. Read access is not required.. */
+class SampleClBackendCustomAllocator : public armnn::ICustomAllocator
+{
+public:
+ SampleClBackendCustomAllocator() = default;
+
+ void* allocate(size_t size, size_t alignment)
+ {
+ // If alignment is 0 just use the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE for alignment
+ if (alignment == 0)
+ {
+ alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
+ }
+ size_t space = size + alignment + alignment;
+ auto allocatedMemPtr = std::malloc(space * sizeof(size_t));
+
+ if (std::align(alignment, size, allocatedMemPtr, space) == nullptr)
+ {
+ throw armnn::Exception("SampleClBackendCustomAllocator::Alignment failed");
+ }
+ return allocatedMemPtr;
+ }
+
+ /** Interface to be implemented by the child class to free the allocated tensor */
+ void free(void* ptr)
+ {
+ std::free(ptr);
+ }
+
+ armnn::MemorySource GetMemorySourceType()
+ {
+ return armnn::MemorySource::Malloc;
+ }
+};
+
+TEST_SUITE("ClCustomAllocatorTests")
+{
+
+// This is a copy of the SimpleSample app modified to use a custom
+// allocator for the clbackend. It creates a FullyConnected network with a single layer
+// taking a single number as an input
+TEST_CASE("ClCustomAllocatorTest")
+{
+ using namespace armnn;
+
+ float number = 3;
+
+ // Construct ArmNN network
+ armnn::NetworkId networkIdentifier;
+ INetworkPtr myNetwork = INetwork::Create();
+
+ armnn::FullyConnectedDescriptor fullyConnectedDesc;
+ float weightsData[] = {1.0f}; // Identity
+ TensorInfo weightsInfo(TensorShape({1, 1}), DataType::Float32);
+ weightsInfo.SetConstant(true);
+ armnn::ConstTensor weights(weightsInfo, weightsData);
+
+ ARMNN_NO_DEPRECATE_WARN_BEGIN
+ IConnectableLayer* fullyConnected = myNetwork->AddFullyConnectedLayer(fullyConnectedDesc,
+ weights,
+ EmptyOptional(),
+ "fully connected");
+ ARMNN_NO_DEPRECATE_WARN_END
+ IConnectableLayer* InputLayer = myNetwork->AddInputLayer(0);
+ IConnectableLayer* OutputLayer = myNetwork->AddOutputLayer(0);
+ InputLayer->GetOutputSlot(0).Connect(fullyConnected->GetInputSlot(0));
+ fullyConnected->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0));
+
+
+ // Create ArmNN runtime
+ IRuntime::CreationOptions options; // default options
+ auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
+ options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}};
+ IRuntimePtr run = IRuntime::Create(options);
+
+ //Set the tensors in the network.
+ TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
+ InputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
+
+ TensorInfo outputTensorInfo(TensorShape({1, 1}), DataType::Float32);
+ fullyConnected->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+ // Optimise ArmNN network
+ OptimizerOptions optOptions;
+ optOptions.m_ImportEnabled = true;
+ armnn::IOptimizedNetworkPtr optNet = Optimize(*myNetwork, {"GpuAcc"}, run->GetDeviceSpec(), optOptions);
+ CHECK(optNet);
+
+ // Load graph into runtime
+ std::string ignoredErrorMessage;
+ INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
+ run->LoadNetwork(networkIdentifier, std::move(optNet), ignoredErrorMessage, networkProperties);
+
+ // Creates structures for input & output
+ unsigned int numElements = inputTensorInfo.GetNumElements();
+ size_t totalBytes = numElements * sizeof(float);
+
+ const size_t alignment =
+ arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
+
+ void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
+
+ // Input with negative values
+ auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
+ std::fill_n(inputPtr, numElements, number);
+
+ void* alignedOutputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
+ auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
+ std::fill_n(outputPtr, numElements, -10.0f);
+
+ armnn::InputTensors inputTensors
+ {
+ {0, armnn::ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), alignedInputPtr)},
+ };
+ armnn::OutputTensors outputTensors
+ {
+ {0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), alignedOutputPtr)}
+ };
+
+ // Execute network
+ run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
+ run->UnloadNetwork(networkIdentifier);
+
+
+ // Tell the CLBackend to sync memory so we can read the output.
+ arm_compute::CLScheduler::get().sync();
+ auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
+
+ run->UnloadNetwork(networkIdentifier);
+ CHECK(outputResult[0] == number);
+ auto& backendRegistry = armnn::BackendRegistryInstance();
+ backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic());
+}
+
+} // test suite ClCustomAllocatorTests \ No newline at end of file
diff --git a/src/backends/cl/test/ClImportTensorHandleTests.cpp b/src/backends/cl/test/ClImportTensorHandleTests.cpp
index 931729a736..6b1d3521d5 100644
--- a/src/backends/cl/test/ClImportTensorHandleTests.cpp
+++ b/src/backends/cl/test/ClImportTensorHandleTests.cpp
@@ -61,7 +61,7 @@ TEST_CASE_FIXTURE(ClContextControlFixture, "ClMallocImport")
// Validate result by checking that the output has no negative values
for(unsigned int i = 0; i < numElements; ++i)
{
- CHECK(typedPtr[i] >= 0);
+ CHECK(typedPtr[i] == 0);
}
}