ArmNN
 22.05
ClCustomAllocatorTests.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
7 #include <armnn/Descriptors.hpp>
8 #include <armnn/Exceptions.hpp>
9 #include <armnn/INetwork.hpp>
10 #include <armnn/IRuntime.hpp>
11 #include <armnn/Utils.hpp>
13 
14 #include <cl/ClBackend.hpp>
15 #if defined(ARMCOMPUTENEON_ENABLED)
16 #include <neon/NeonBackend.hpp>
17 #endif
18 #include <doctest/doctest.h>
20 // Contains the OpenCl interfaces for mapping memory in the Gpu Page Tables
21 // Requires the OpenCl backend to be included (GpuAcc)
22 #include <arm_compute/core/CL/CLKernelLibrary.h>
23 #include <CL/cl_ext.h>
24 #include <arm_compute/runtime/CL/CLScheduler.h>
25 
26 /** Sample implementation of ICustomAllocator for use with the ClBackend.
27  * Note: any memory allocated must be host accessible with write access to allow for weights and biases
28  * to be passed in. Read access is not required.. */
29 class SampleClBackendCustomAllocator : public armnn::ICustomAllocator
30 {
31 public:
32  SampleClBackendCustomAllocator() = default;
33 
34  void* allocate(size_t size, size_t alignment) override
35  {
36  // If alignment is 0 just use the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE for alignment
37  if (alignment == 0)
38  {
39  alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
40  }
41  size_t space = size + alignment + alignment;
42  auto allocatedMemPtr = std::malloc(space * sizeof(size_t));
43  if (std::align(alignment, size, allocatedMemPtr, space) == nullptr)
44  {
45  throw armnn::Exception("SampleClBackendCustomAllocator::Alignment failed");
46  }
47  return allocatedMemPtr;
48  }
49 
50  /** Interface to be implemented by the child class to free the allocated tensor */
51  void free(void* ptr) override
52  {
53  std::free(ptr);
54  }
55 
57  {
59  }
60 };
61 
63 {
64  using namespace armnn;
65 
66  armnn::FullyConnectedDescriptor fullyConnectedDesc;
67  float weightsData[] = {1.0f}; // Identity
68  TensorInfo weightsInfo(TensorShape({1, 1}), DataType::Float32, 0.0f, 0, true);
69  weightsInfo.SetConstant(true);
70  armnn::ConstTensor weights(weightsInfo, weightsData);
71 
73  armnn::IConnectableLayer* const inputLayer = network->AddInputLayer(0);
74  armnn::IConnectableLayer* const weightsLayer = network->AddConstantLayer(weights, "Weights");
75  armnn::IConnectableLayer* const fullyConnectedLayer =
76  network->AddFullyConnectedLayer(fullyConnectedDesc, "fully connected");
77  armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0);
78 
79  inputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
80  weightsLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(1));
81  fullyConnectedLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
82 
83  weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
84 
85  //Set the tensors in the network.
86 
87  inputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
88 
89  TensorInfo outputTensorInfo(TensorShape({1, 1}), DataType::Float32);
90  fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
91 
92  return network;
93 }
94 
95 TEST_SUITE("ClCustomAllocatorTests")
96 {
97 
98 // This is a copy of the SimpleSample app modified to use a custom
99 // allocator for the clbackend. It creates a FullyConnected network with a single layer
100 // taking a single number as an input
101 TEST_CASE("ClCustomAllocatorTest")
102 {
103  using namespace armnn;
104 
105  float number = 3;
106 
107  // Construct ArmNN network
108  armnn::NetworkId networkIdentifier;
109 
110  TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
111 
112  INetworkPtr myNetwork = CreateTestNetwork(inputTensorInfo);
113 
114  // Create ArmNN runtime
115  IRuntime::CreationOptions options; // default options
116  auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
117  options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}};
118  IRuntimePtr run = IRuntime::Create(options);
119 
120  // Optimise ArmNN network
121  OptimizerOptions optOptions;
122  optOptions.m_ImportEnabled = true;
123  armnn::IOptimizedNetworkPtr optNet = Optimize(*myNetwork, {"GpuAcc"}, run->GetDeviceSpec(), optOptions);
124  CHECK(optNet);
125 
126  // Load graph into runtime
127  std::string ignoredErrorMessage;
128  INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
129  run->LoadNetwork(networkIdentifier, std::move(optNet), ignoredErrorMessage, networkProperties);
130 
131  // Creates structures for input & output
132  unsigned int numElements = inputTensorInfo.GetNumElements();
133  size_t totalBytes = numElements * sizeof(float);
134 
135  const size_t alignment =
136  arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
137 
138  void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
139 
140  // Input with negative values
141  auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
142  std::fill_n(inputPtr, numElements, number);
143 
144  void* alignedOutputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
145  auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
146  std::fill_n(outputPtr, numElements, -10.0f);
147 
148  armnn::TensorInfo inputTensorInfo2 = run->GetInputTensorInfo(networkIdentifier, 0);
149  inputTensorInfo2.SetConstant(true);
150  armnn::InputTensors inputTensors
151  {
152  {0, armnn::ConstTensor(inputTensorInfo2, alignedInputPtr)},
153  };
154  armnn::OutputTensors outputTensors
155  {
156  {0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), alignedOutputPtr)}
157  };
158 
159  // Execute network
160  run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
161  run->UnloadNetwork(networkIdentifier);
162 
163 
164  // Tell the CLBackend to sync memory so we can read the output.
165  arm_compute::CLScheduler::get().sync();
166  auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
167 
168  run->UnloadNetwork(networkIdentifier);
169  CHECK(outputResult[0] == number);
170  auto& backendRegistry = armnn::BackendRegistryInstance();
171  backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic());
172 }
173 
174 // Only run this test if NEON is enabled
175 #if defined(ARMCOMPUTENEON_ENABLED)
176 
177 TEST_CASE("ClCustomAllocatorCpuAccNegativeTest")
178 {
179  using namespace armnn;
180 
181  // Create ArmNN runtime
182  IRuntime::CreationOptions options; // default options
183  auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
184  options.m_CustomAllocatorMap = {{"CpuAcc", std::move(customAllocator)}};
185  IRuntimePtr run = IRuntime::Create(options);
186  TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
187  INetworkPtr myNetwork = CreateTestNetwork(inputTensorInfo);
188 
189  // Optimise ArmNN network
190  OptimizerOptions optOptions;
191  optOptions.m_ImportEnabled = true;
192  IOptimizedNetworkPtr optNet(nullptr, nullptr);
193  std::vector<std::string> errMessages;
194 
195  CHECK_THROWS_AS_MESSAGE(Optimize(*myNetwork, {"CpuAcc"}, run->GetDeviceSpec(), optOptions, errMessages),
197  "Expected an exception as GetAvailablePreferredBackends() should be empty in Optimize().");
198 
199  auto& backendRegistry = armnn::BackendRegistryInstance();
200  backendRegistry.DeregisterAllocator(NeonBackend::GetIdStatic());
201 }
202 
203 #endif
204 
205 TEST_CASE("ClCustomAllocatorGpuAccNullptrTest")
206 {
207  using namespace armnn;
208 
209  // Create ArmNN runtime
210  IRuntime::CreationOptions options; // default options
211  auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
212  options.m_CustomAllocatorMap = {{"GpuAcc", nullptr}};
213 
214  CHECK_THROWS_AS_MESSAGE(IRuntimePtr run = IRuntime::Create(options),
216  "Expected exception in RuntimeImpl::RuntimeImpl() as allocator was nullptr.");
217 }
218 
219 } // test suite ClCustomAllocatorTests
Custom Allocator interface.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:66
virtual void free(void *ptr)=0
Interface to be implemented by the child class to free the allocated bytes.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:33
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
Copyright (c) 2021 ARM Limited and Contributors.
TEST_SUITE("ClCustomAllocatorTests")
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1847
A FullyConnectedDescriptor for the FullyConnectedLayer.
int NetworkId
Definition: IRuntime.hpp:27
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
virtual armnn::MemorySource GetMemorySourceType()=0
Used to specify what type of memory is being allocated by this allocator.
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:242
std::map< BackendId, std::shared_ptr< ICustomAllocator > > m_CustomAllocatorMap
A map to define a custom memory allocator for specific backend Ids.
Definition: IRuntime.hpp:114
ArmNN performs an optimization on each model/network before it gets loaded for execution.
Definition: INetwork.hpp:137
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:230
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:514
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
armnn::INetworkPtr CreateTestNetwork(armnn::TensorInfo &inputTensorInfo)
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:241
virtual int Connect(IInputSlot &destination)=0
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:476
virtual void * allocate(size_t size, size_t alignment)=0
Interface to be implemented by the child class to allocate bytes.
unsigned int GetNumElements() const
Definition: Tensor.hpp:196