15 #if defined(ARMCOMPUTENEON_ENABLED) 18 #include <doctest/doctest.h> 22 #include <arm_compute/core/CL/CLKernelLibrary.h> 23 #include <CL/cl_ext.h> 24 #include <arm_compute/runtime/CL/CLScheduler.h> 32 SampleClBackendCustomAllocator() =
default;
34 void*
allocate(
size_t size,
size_t alignment)
override 39 alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
41 size_t space = size + alignment + alignment;
42 auto allocatedMemPtr = std::malloc(space *
sizeof(
size_t));
43 if (std::align(alignment, size, allocatedMemPtr, space) ==
nullptr)
47 return allocatedMemPtr;
51 void free(
void* ptr)
override 64 using namespace armnn;
67 float weightsData[] = {1.0f};
76 network->AddFullyConnectedLayer(fullyConnectedDesc,
"fully connected");
101 TEST_CASE(
"ClCustomAllocatorTest")
103 using namespace armnn;
116 auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
127 std::string ignoredErrorMessage;
128 INetworkProperties networkProperties(
false, MemorySource::Malloc, MemorySource::Malloc);
129 run->LoadNetwork(networkIdentifier, std::move(optNet), ignoredErrorMessage, networkProperties);
133 size_t totalBytes = numElements *
sizeof(float);
135 const size_t alignment =
136 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
138 void* alignedInputPtr = options.
m_CustomAllocatorMap[
"GpuAcc"]->allocate(totalBytes, alignment);
141 auto* inputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
142 std::fill_n(inputPtr, numElements, number);
144 void* alignedOutputPtr = options.
m_CustomAllocatorMap[
"GpuAcc"]->allocate(totalBytes, alignment);
145 auto* outputPtr =
reinterpret_cast<float*
>(alignedOutputPtr);
146 std::fill_n(outputPtr, numElements, -10.0f);
148 armnn::TensorInfo inputTensorInfo2 = run->GetInputTensorInfo(networkIdentifier, 0);
156 {0,
armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), alignedOutputPtr)}
160 run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
161 run->UnloadNetwork(networkIdentifier);
165 arm_compute::CLScheduler::get().sync();
166 auto* outputResult =
reinterpret_cast<float*
>(alignedOutputPtr);
168 run->UnloadNetwork(networkIdentifier);
169 CHECK(outputResult[0] == number);
171 backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic());
175 #if defined(ARMCOMPUTENEON_ENABLED) 177 TEST_CASE(
"ClCustomAllocatorCpuAccNegativeTest")
179 using namespace armnn;
183 auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
193 std::vector<std::string> errMessages;
195 CHECK_THROWS_AS_MESSAGE(
Optimize(*myNetwork, {
"CpuAcc"}, run->GetDeviceSpec(), optOptions, errMessages),
197 "Expected an exception as GetAvailablePreferredBackends() should be empty in Optimize().");
200 backendRegistry.DeregisterAllocator(NeonBackend::GetIdStatic());
205 TEST_CASE(
"ClCustomAllocatorGpuAccNullptrTest")
207 using namespace armnn;
211 auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
214 CHECK_THROWS_AS_MESSAGE(
IRuntimePtr run = IRuntime::Create(options),
216 "Expected exception in RuntimeImpl::RuntimeImpl() as allocator was nullptr.");
Custom Allocator interface.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
virtual void free(void *ptr)=0
Interface to be implemented by the child class to free the allocated bytes.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Copyright (c) 2021 ARM Limited and Contributors.
TEST_SUITE("ClCustomAllocatorTests")
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
A FullyConnectedDescriptor for the FullyConnectedLayer.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
virtual armnn::MemorySource GetMemorySourceType()=0
Used to specify what type of memory is being allocated by this allocator.
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
std::map< BackendId, std::shared_ptr< ICustomAllocator > > m_CustomAllocatorMap
A map to define a custom memory allocator for specific backend Ids.
ArmNN performs an optimization on each model/network before it gets loaded for execution.
Base class for all ArmNN exceptions so that users can filter to just those.
MemorySource
Define the Memory Source to reduce copies.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
armnn::INetworkPtr CreateTestNetwork(armnn::TensorInfo &inputTensorInfo)
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
virtual int Connect(IInputSlot &destination)=0
static INetworkPtr Create(NetworkOptions networkOptions={})
virtual void * allocate(size_t size, size_t alignment)=0
Interface to be implemented by the child class to allocate bytes.
unsigned int GetNumElements() const