15 #if defined(ARMCOMPUTENEON_ENABLED) 18 #include <doctest/doctest.h> 22 #include <arm_compute/core/CL/CLKernelLibrary.h> 23 #include <CL/cl_ext.h> 24 #include <arm_compute/runtime/CL/CLScheduler.h> 32 SampleClBackendCustomAllocator() =
default;
34 void*
allocate(
size_t size,
size_t alignment)
override 39 alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
41 size_t space = size + alignment + alignment;
42 auto allocatedMemPtr = std::malloc(space *
sizeof(
size_t));
43 if (std::align(alignment, size, allocatedMemPtr, space) ==
nullptr)
47 return allocatedMemPtr;
51 void free(
void* ptr)
override 64 using namespace armnn;
67 float weightsData[] = {1.0f};
76 network->AddFullyConnectedLayer(fullyConnectedDesc,
"fully connected");
101 TEST_CASE(
"ClCustomAllocatorTest")
103 using namespace armnn;
116 auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
128 std::string ignoredErrorMessage;
129 INetworkProperties networkProperties(
false, MemorySource::Malloc, MemorySource::Malloc);
130 run->LoadNetwork(networkIdentifier, std::move(optNet), ignoredErrorMessage, networkProperties);
134 size_t totalBytes = numElements *
sizeof(float);
136 const size_t alignment =
137 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
139 void* alignedInputPtr = options.
m_CustomAllocatorMap[
"GpuAcc"]->allocate(totalBytes, alignment);
142 auto* inputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
143 std::fill_n(inputPtr, numElements, number);
145 void* alignedOutputPtr = options.
m_CustomAllocatorMap[
"GpuAcc"]->allocate(totalBytes, alignment);
146 auto* outputPtr =
reinterpret_cast<float*
>(alignedOutputPtr);
147 std::fill_n(outputPtr, numElements, -10.0f);
149 armnn::TensorInfo inputTensorInfo2 = run->GetInputTensorInfo(networkIdentifier, 0);
157 {0,
armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), alignedOutputPtr)}
161 run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
162 run->UnloadNetwork(networkIdentifier);
166 arm_compute::CLScheduler::get().sync();
167 auto* outputResult =
reinterpret_cast<float*
>(alignedOutputPtr);
169 run->UnloadNetwork(networkIdentifier);
170 CHECK(outputResult[0] == number);
172 backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic());
176 #if defined(ARMCOMPUTENEON_ENABLED) 178 TEST_CASE(
"ClCustomAllocatorCpuAccNegativeTest")
180 using namespace armnn;
184 auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
194 std::vector<std::string> errMessages;
196 CHECK_THROWS_AS_MESSAGE(
Optimize(*myNetwork, {
"CpuAcc"}, run->GetDeviceSpec(), optOptions, errMessages),
198 "Expected an exception as GetAvailablePreferredBackends() should be empty in Optimize().");
201 backendRegistry.DeregisterAllocator(NeonBackend::GetIdStatic());
206 TEST_CASE(
"ClCustomAllocatorGpuAccNullptrTest")
208 using namespace armnn;
212 auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
215 CHECK_THROWS_AS_MESSAGE(
IRuntimePtr run = IRuntime::Create(options),
217 "Expected exception in RuntimeImpl::RuntimeImpl() as allocator was nullptr.");
Custom Allocator interface.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
virtual void free(void *ptr)=0
Interface to be implemented by the child class to free the allocated bytes.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Copyright (c) 2021 ARM Limited and Contributors.
TEST_SUITE("ClCustomAllocatorTests")
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
A FullyConnectedDescriptor for the FullyConnectedLayer.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
virtual armnn::MemorySource GetMemorySourceType()=0
Used to specify what type of memory is being allocated by this allocator.
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
std::map< BackendId, std::shared_ptr< ICustomAllocator > > m_CustomAllocatorMap
A map to define a custom memory allocator for specific backend Ids.
ArmNN performs an optimization on each model/network before it gets loaded for execution.
Base class for all ArmNN exceptions so that users can filter to just those.
MemorySource
Define the Memory Source to reduce copies.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
armnn::INetworkPtr CreateTestNetwork(armnn::TensorInfo &inputTensorInfo)
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
virtual int Connect(IInputSlot &destination)=0
static INetworkPtr Create(NetworkOptions networkOptions={})
virtual void * allocate(size_t size, size_t alignment)=0
Interface to be implemented by the child class to allocate bytes.
unsigned int GetNumElements() const