ArmNN
 22.02
ClImportTensorHandleTests.cpp File Reference
#include <arm_compute/runtime/CL/functions/CLActivationLayer.h>
#include <cl/ClImportTensorHandle.hpp>
#include <cl/ClImportTensorHandleFactory.hpp>
#include <cl/test/ClContextControlFixture.hpp>
#include <doctest/doctest.h>
#include <armnn/IRuntime.hpp>
#include <armnn/INetwork.hpp>

Go to the source code of this file.

Functions

 TEST_SUITE ("ClImportTensorHandleTests")
 

Function Documentation

◆ TEST_SUITE()

TEST_SUITE ( "ClImportTensorHandleTests"  )

Definition at line 20 of file ClImportTensorHandleTests.cpp.

References ARMNN_ASSERT, IOutputSlot::Connect(), IRuntime::Create(), INetwork::Create(), ClImportTensorHandleFactory::CreateTensorHandle(), armnn::Float32, IConnectableLayer::GetInputSlot(), ProfilerManager::GetInstance(), TensorInfo::GetNumElements(), IConnectableLayer::GetOutputSlot(), ProfilerManager::GetProfiler(), armnn::GpuAcc, armnn::info, Convolution2dDescriptor::m_DataLayout, ActivationDescriptor::m_Function, OptimizerOptions::m_ImportEnabled, Convolution2dDescriptor::m_PadBottom, Convolution2dDescriptor::m_PadLeft, Convolution2dDescriptor::m_PadRight, Convolution2dDescriptor::m_PadTop, Convolution2dDescriptor::m_StrideX, Convolution2dDescriptor::m_StrideY, armnn::Malloc, armnn::NHWC, armnn::Optimize(), IProfiler::Print(), armnn::ReLu, TensorInfo::SetConstant(), IOutputSlot::SetTensorInfo(), TEST_CASE_FIXTURE(), and armnn::Undefined.

21 {
23 {
24  ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
25  static_cast<MemorySourceFlags>(MemorySource::Malloc));
26 
27  TensorInfo info({ 1, 24, 16, 3 }, DataType::Float32);
28  unsigned int numElements = info.GetNumElements();
29 
30  // create TensorHandle for memory import
31  auto handle = handleFactory.CreateTensorHandle(info);
32 
33  // Get CLtensor
34  arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
35 
36  // Create and configure activation function
37  const arm_compute::ActivationLayerInfo act_info(arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
38  arm_compute::CLActivationLayer act_func;
39  act_func.configure(&tensor, nullptr, act_info);
40 
41  // Allocate user memory
42  const size_t totalBytes = tensor.info()->total_size();
43  const size_t alignment =
44  arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
45  size_t space = totalBytes + alignment + alignment;
46  auto testData = std::make_unique<uint8_t[]>(space);
47  void* alignedPtr = testData.get();
48  CHECK(std::align(alignment, totalBytes, alignedPtr, space));
49 
50  // Import memory
51  CHECK(handle->Import(alignedPtr, armnn::MemorySource::Malloc));
52 
53  // Input with negative values
54  auto* typedPtr = reinterpret_cast<float*>(alignedPtr);
55  std::fill_n(typedPtr, numElements, -5.0f);
56 
57  // Execute function and sync
58  act_func.run();
59  arm_compute::CLScheduler::get().sync();
60 
61  // Validate result by checking that the output has no negative values
62  for(unsigned int i = 0; i < numElements; ++i)
63  {
64  CHECK(typedPtr[i] == 0);
65  }
66 }
67 
68 TEST_CASE_FIXTURE(ClContextControlFixture, "ClIncorrectMemorySourceImport")
69 {
70  ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
71  static_cast<MemorySourceFlags>(MemorySource::Malloc));
72 
73  TensorInfo info({ 1, 24, 16, 3 }, DataType::Float32);
74 
75  // create TensorHandle for memory import
76  auto handle = handleFactory.CreateTensorHandle(info);
77 
78  // Get CLtensor
79  arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
80 
81  // Allocate user memory
82  const size_t totalBytes = tensor.info()->total_size();
83  const size_t alignment =
84  arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
85  size_t space = totalBytes + alignment + alignment;
86  auto testData = std::make_unique<uint8_t[]>(space);
87  void* alignedPtr = testData.get();
88  CHECK(std::align(alignment, totalBytes, alignedPtr, space));
89 
90  // Import memory
91  CHECK_THROWS_AS(handle->Import(alignedPtr, armnn::MemorySource::Undefined), MemoryImportException);
92 }
93 
94 TEST_CASE_FIXTURE(ClContextControlFixture, "ClInvalidMemorySourceImport")
95 {
96  MemorySource invalidMemSource = static_cast<MemorySource>(256);
97  ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(invalidMemSource),
98  static_cast<MemorySourceFlags>(invalidMemSource));
99 
100  TensorInfo info({ 1, 2, 2, 1 }, DataType::Float32);
101 
102  // create TensorHandle for memory import
103  auto handle = handleFactory.CreateTensorHandle(info);
104 
105  // Allocate user memory
106  std::vector<float> inputData
107  {
108  1.0f, 2.0f, 3.0f, 4.0f
109  };
110 
111  // Import non-support memory
112  CHECK_THROWS_AS(handle->Import(inputData.data(), invalidMemSource), MemoryImportException);
113 }
114 
115 TEST_CASE_FIXTURE(ClContextControlFixture, "ClImportEndToEnd")
116 {
117  // Create runtime in which test will run
119  IRuntimePtr runtime(armnn::IRuntime::Create(options));
120 
121  // build up the structure of the network
122  INetworkPtr net(INetwork::Create());
123 
124  IConnectableLayer* input = net->AddInputLayer(0, "Input");
125 
126  ActivationDescriptor descriptor;
127  descriptor.m_Function = ActivationFunction::ReLu;
128  IConnectableLayer* activation = net->AddActivationLayer(descriptor, "Activation");
129 
130  IConnectableLayer* output = net->AddOutputLayer(0, "Output");
131 
132  input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
133  activation->GetOutputSlot(0).Connect(output->GetInputSlot(0));
134 
135  TensorInfo tensorInfo = TensorInfo({ 1, 24, 16, 3 }, DataType::Float32);
136  unsigned int numElements = tensorInfo.GetNumElements();
137  size_t totalBytes = numElements * sizeof(float);
138 
139  input->GetOutputSlot(0).SetTensorInfo(tensorInfo);
140  activation->GetOutputSlot(0).SetTensorInfo(tensorInfo);
141 
142  // Optimize the network
143  OptimizerOptions optOptions;
144  optOptions.m_ImportEnabled = true;
145  std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
146  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
147  CHECK(optNet);
148 
149  // Loads it into the runtime.
150  NetworkId netId;
151  std::string ignoredErrorMessage;
152  // Enable Importing
153  INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
154  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
155 
156  // Creates structures for input & output
157  const size_t alignment =
158  arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
159  size_t space = totalBytes + alignment + alignment;
160  auto inputData = std::make_unique<uint8_t[]>(space);
161  void* alignedInputPtr = inputData.get();
162  CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
163 
164  // Input with negative values
165  auto* intputPtr = reinterpret_cast<float*>(alignedInputPtr);
166  std::fill_n(intputPtr, numElements, -5.0f);
167 
168  auto outputData = std::make_unique<uint8_t[]>(space);
169  void* alignedOutputPtr = outputData.get();
170  CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
171  auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
172  std::fill_n(outputPtr, numElements, -10.0f);
173 
174  TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
175  inputTensorInfo.SetConstant(true);
176  InputTensors inputTensors
177  {
178  {0,armnn::ConstTensor(inputTensorInfo, alignedInputPtr)},
179  };
180  OutputTensors outputTensors
181  {
182  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
183  };
184 
185  runtime->GetProfiler(netId)->EnableProfiling(true);
186 
187  // Do the inference
188  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
189 
190  // Retrieve the Profiler.Print() output to get the workload execution
192  std::stringstream ss;
193  profilerManager.GetProfiler()->Print(ss);;
194  std::string dump = ss.str();
195 
196  // Contains ActivationWorkload
197  std::size_t found = dump.find("ActivationWorkload");
198  CHECK(found != std::string::npos);
199 
200  // Contains SyncMemGeneric
201  found = dump.find("SyncMemGeneric");
202  CHECK(found != std::string::npos);
203 
204  // Does not contain CopyMemGeneric
205  found = dump.find("CopyMemGeneric");
206  CHECK(found == std::string::npos);
207 
208  runtime->UnloadNetwork(netId);
209 
210  // Check output is as expected
211  // Validate result by checking that the output has no negative values
212  auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
213  CHECK(outputResult);
214  for(unsigned int i = 0; i < numElements; ++i)
215  {
216  CHECK(outputResult[i] >= 0);
217  }
218 }
219 
220 TEST_CASE_FIXTURE(ClContextControlFixture, "ClCanBeImported")
221 {
222  ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
223  static_cast<MemorySourceFlags>(MemorySource::Malloc));
224 
225  TensorInfo info({ 1, 24, 16, 3 }, DataType::Float32);
226 
227  // create TensorHandle for memory import
228  auto handle = handleFactory.CreateTensorHandle(info);
229 
230  // Get CLtensor
231  arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
232 
233  // Allocate user memory
234  const size_t totalBytes = tensor.info()->total_size();
235  const size_t alignment =
236  arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
237  size_t space = totalBytes + alignment + alignment;
238  auto testData = std::make_unique<uint8_t[]>(space);
239  void* alignedPtr = testData.get();
240  CHECK(std::align(alignment, totalBytes, alignedPtr, space));
241 
242  // Import memory
243  CHECK_THROWS_AS(handle->CanBeImported(alignedPtr, armnn::MemorySource::Undefined), MemoryImportException);
244 
245 }
246 
247 TEST_CASE("ClCanBeImportedAlignedMemory")
248 {
249  ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
250  static_cast<MemorySourceFlags>(MemorySource::Malloc));
251 
252  TensorInfo info({ 1, 1, 1, 1 }, DataType::Float32);
253 
254  // create TensorHandle (Memory Managed status is irrelevant)
255  auto handle = handleFactory.CreateTensorHandle(info);
256  // Get CLtensor
257  arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
258 
259  // Create an aligned buffer
260  const size_t totalBytes = tensor.info()->total_size();
261  const size_t alignment =
262  arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
263  size_t space = totalBytes + alignment + alignment;
264  auto testData = std::make_unique<uint8_t[]>(space);
265  void* alignedPtr = testData.get();
266  CHECK(std::align(alignment, totalBytes, alignedPtr, space));
267 
268  // Check aligned buffers return true
269  CHECK(handle->CanBeImported(alignedPtr, MemorySource::Malloc) == true);
270 
271  // Due to the nature of how GPU memory is mapped it is entirely possible for memory which is misaligned on cpu
272  // to be successfully import on GPU. As such there is no way to create a misaligned pointer that will always fail.
273  // Rather it will succeed on some devices and fail on others. As long as a correctly aligned buffer returns true
274  // we can be confident that it will be successfully imported. All other cases will need to be handled by the user.
275 }
276 
277 TEST_CASE_FIXTURE(ClContextControlFixture, "ClForceImportConv2dEndToEnd")
278 {
279  // Create runtime in which test will run
281  IRuntimePtr runtime(armnn::IRuntime::Create(options));
282 
283  // build up the structure of the network
284  INetworkPtr network(INetwork::Create());
285 
286  armnn::TensorInfo inputInfo({ 1, 3, 4, 1 }, DataType::Float32);
287  armnn::TensorInfo kernelInfo({ 1, 3, 3, 1 }, DataType::Float32);
288  armnn::TensorInfo outputInfo({ 1, 3, 4, 1 }, DataType::Float32);
289 
290  kernelInfo.SetConstant(true);
291 
292  std::vector<float> kernel =
293  {
294  4, 5, 6,
295  0, 0, 0,
296  3, 2, 1
297  };
298 
299  const std::vector<float> expectedOutput =
300  {
301  23, 41, 33, 21,
302  44, 65, 76, 52,
303  82, 85, 79, 42
304  };
305 
306  unsigned int numElements = inputInfo.GetNumElements();
307  size_t totalBytes = numElements * sizeof(float);
308 
309  IConnectableLayer* const inputLayer = network->AddInputLayer(0, "input");
310  ARMNN_ASSERT(inputLayer);
311 
312  armnn::ConstTensor weights(kernelInfo, kernel);
313 
315  convDesc2d.m_StrideX = 1;
316  convDesc2d.m_StrideY = 1;
317  convDesc2d.m_PadLeft = 1;
318  convDesc2d.m_PadRight = 1;
319  convDesc2d.m_PadTop = 1;
320  convDesc2d.m_PadBottom = 1;
321  convDesc2d.m_DataLayout = DataLayout::NHWC;
322  armnn::IConnectableLayer* const convLayer = network->AddConvolution2dLayer(convDesc2d,
323  weights,
325  "conv");
326  ARMNN_ASSERT(convLayer);
327 
328  inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
329  inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
330 
331  IConnectableLayer* output = network->AddOutputLayer(0, "output");
332  convLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
333  convLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
334 
335  // Optimize the network
336  OptimizerOptions optOptions;
337  optOptions.m_ImportEnabled = false;
338  std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
339  IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec(), optOptions);
340  CHECK(optNet);
341 
342  // Loads it into the runtime.
343  NetworkId netId;
344  std::string ignoredErrorMessage;
345  // Enable Importing
346  INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
347  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
348 
349  // Creates structures for input & output
350  const size_t alignment =
351  arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
352  size_t space = totalBytes + alignment + alignment;
353  auto inputData = std::make_unique<uint8_t[]>(space);
354  void* alignedInputPtr = inputData.get();
355  CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
356 
357  // Input with negative values
358  auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
359  inputPtr[0] = 1;
360  inputPtr[1] = 5;
361  inputPtr[2] = 2;
362  inputPtr[3] = 3;
363  inputPtr[4] = 8;
364  inputPtr[5] = 7;
365  inputPtr[6] = 3;
366  inputPtr[7] = 6;
367  inputPtr[8] = 3;
368  inputPtr[9] = 3;
369  inputPtr[10] = 9;
370  inputPtr[11] = 1;
371 
372 
373  auto outputData = std::make_unique<uint8_t[]>(space);
374  void* alignedOutputPtr = outputData.get();
375  CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
376  auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
377  std::fill_n(outputPtr, numElements, -10.0f);
378 
379  TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
380  inputTensorInfo.SetConstant(true);
381  InputTensors inputTensors
382  {
383  {0,armnn::ConstTensor(inputTensorInfo, alignedInputPtr)},
384  };
385  OutputTensors outputTensors
386  {
387  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
388  };
389 
390  runtime->GetProfiler(netId)->EnableProfiling(true);
391 
392  INFO("Run ImportInputs");
393  std::vector<ImportedInputId> importedInputIds =
394  runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
395  std::vector<ImportedOutputId> importedOutputIds =
396  runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
397 
398  // Do the inference
399  runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
400 
401  // Retrieve the Profiler.Print() output to get the workload execution
403  std::stringstream ss;
404  profilerManager.GetProfiler()->Print(ss);;
405  std::string dump = ss.str();
406 
407  // Contains Convolution2dWorkload
408  std::size_t found = dump.find("Convolution2dWorkload");
409  CHECK(found != std::string::npos);
410 
411  // Contains SyncMemGeneric
412  found = dump.find("SyncMemGeneric");
413  CHECK(found != std::string::npos);
414 
415  // Does not contain CopyMemGeneric
416  found = dump.find("CopyMemGeneric");
417  CHECK(found == std::string::npos);
418 
419  runtime->UnloadNetwork(netId);
420 
421  // Check output is as expected
422  // Validate result by checking that the output has no negative values
423  auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
424  CHECK(outputResult);
425 
426  // Check the output is correct
427  CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
428 }
429 
430 }
uint32_t m_PadBottom
Padding bottom value in the height dimension.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:40
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:66
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:568
A Convolution2dDescriptor for the Convolution2dLayer.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:31
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:605
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
uint32_t m_PadRight
Padding right value in the width dimension.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
Definition: Profiling.cpp:580
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
uint32_t m_PadTop
Padding top value in the height dimension.
TEST_CASE_FIXTURE(ClContextControlFixture, "CopyBetweenNeonAndGpu")
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1680
int NetworkId
Definition: IRuntime.hpp:25
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:242
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
GPU Execution: OpenCL: ArmCompute.
ArmNN performs an optimization on each model/network before it gets loaded for execution.
Definition: INetwork.hpp:137
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:36
This factory creates ClImportTensorHandles that refer to imported memory tensors. ...
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:217
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:516
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:241
virtual int Connect(IInputSlot &destination)=0
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
Definition: Descriptors.hpp:59
uint32_t m_PadLeft
Padding left value in the width dimension.
unsigned int GetNumElements() const
Definition: Tensor.hpp:196