ArmNN
 21.11
ClImportTensorHandleTests.cpp File Reference
#include <arm_compute/runtime/CL/functions/CLActivationLayer.h>
#include <cl/ClImportTensorHandle.hpp>
#include <cl/ClImportTensorHandleFactory.hpp>
#include <cl/test/ClContextControlFixture.hpp>
#include <doctest/doctest.h>
#include <armnn/IRuntime.hpp>
#include <armnn/INetwork.hpp>

Go to the source code of this file.

Functions

 TEST_SUITE ("ClImportTensorHandleTests")
 

Function Documentation

◆ TEST_SUITE()

TEST_SUITE ( "ClImportTensorHandleTests"  )

Definition at line 20 of file ClImportTensorHandleTests.cpp.

References IOutputSlot::Connect(), INetwork::Create(), IRuntime::Create(), ClImportTensorHandleFactory::CreateTensorHandle(), armnn::Float32, IConnectableLayer::GetInputSlot(), ProfilerManager::GetInstance(), TensorInfo::GetNumElements(), IConnectableLayer::GetOutputSlot(), ProfilerManager::GetProfiler(), armnn::GpuAcc, armnn::info, ActivationDescriptor::m_Function, OptimizerOptions::m_ImportEnabled, armnn::Malloc, armnn::Optimize(), IProfiler::Print(), armnn::ReLu, TensorInfo::SetConstant(), IOutputSlot::SetTensorInfo(), TEST_CASE_FIXTURE(), and armnn::Undefined.

21 {
23 {
24  ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
25  static_cast<MemorySourceFlags>(MemorySource::Malloc));
26 
27  TensorInfo info({ 1, 24, 16, 3 }, DataType::Float32);
28  unsigned int numElements = info.GetNumElements();
29 
30  // create TensorHandle for memory import
31  auto handle = handleFactory.CreateTensorHandle(info);
32 
33  // Get CLtensor
34  arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
35 
36  // Create and configure activation function
37  const arm_compute::ActivationLayerInfo act_info(arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
38  arm_compute::CLActivationLayer act_func;
39  act_func.configure(&tensor, nullptr, act_info);
40 
41  // Allocate user memory
42  const size_t totalBytes = tensor.info()->total_size();
43  const size_t alignment =
44  arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
45  size_t space = totalBytes + alignment + alignment;
46  auto testData = std::make_unique<uint8_t[]>(space);
47  void* alignedPtr = testData.get();
48  CHECK(std::align(alignment, totalBytes, alignedPtr, space));
49 
50  // Import memory
51  CHECK(handle->Import(alignedPtr, armnn::MemorySource::Malloc));
52 
53  // Input with negative values
54  auto* typedPtr = reinterpret_cast<float*>(alignedPtr);
55  std::fill_n(typedPtr, numElements, -5.0f);
56 
57  // Execute function and sync
58  act_func.run();
59  arm_compute::CLScheduler::get().sync();
60 
61  // Validate result by checking that the output has no negative values
62  for(unsigned int i = 0; i < numElements; ++i)
63  {
64  CHECK(typedPtr[i] == 0);
65  }
66 }
67 
68 TEST_CASE_FIXTURE(ClContextControlFixture, "ClIncorrectMemorySourceImport")
69 {
70  ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
71  static_cast<MemorySourceFlags>(MemorySource::Malloc));
72 
73  TensorInfo info({ 1, 24, 16, 3 }, DataType::Float32);
74 
75  // create TensorHandle for memory import
76  auto handle = handleFactory.CreateTensorHandle(info);
77 
78  // Get CLtensor
79  arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
80 
81  // Allocate user memory
82  const size_t totalBytes = tensor.info()->total_size();
83  const size_t alignment =
84  arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
85  size_t space = totalBytes + alignment + alignment;
86  auto testData = std::make_unique<uint8_t[]>(space);
87  void* alignedPtr = testData.get();
88  CHECK(std::align(alignment, totalBytes, alignedPtr, space));
89 
90  // Import memory
91  CHECK_THROWS_AS(handle->Import(alignedPtr, armnn::MemorySource::Undefined), MemoryImportException);
92 }
93 
94 TEST_CASE_FIXTURE(ClContextControlFixture, "ClInvalidMemorySourceImport")
95 {
96  MemorySource invalidMemSource = static_cast<MemorySource>(256);
97  ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(invalidMemSource),
98  static_cast<MemorySourceFlags>(invalidMemSource));
99 
100  TensorInfo info({ 1, 2, 2, 1 }, DataType::Float32);
101 
102  // create TensorHandle for memory import
103  auto handle = handleFactory.CreateTensorHandle(info);
104 
105  // Allocate user memory
106  std::vector<float> inputData
107  {
108  1.0f, 2.0f, 3.0f, 4.0f
109  };
110 
111  // Import non-support memory
112  CHECK_THROWS_AS(handle->Import(inputData.data(), invalidMemSource), MemoryImportException);
113 }
114 
115 TEST_CASE_FIXTURE(ClContextControlFixture, "ClImportEndToEnd")
116 {
117  // Create runtime in which test will run
119  IRuntimePtr runtime(armnn::IRuntime::Create(options));
120 
121  // build up the structure of the network
122  INetworkPtr net(INetwork::Create());
123 
124  IConnectableLayer* input = net->AddInputLayer(0, "Input");
125 
126  ActivationDescriptor descriptor;
127  descriptor.m_Function = ActivationFunction::ReLu;
128  IConnectableLayer* activation = net->AddActivationLayer(descriptor, "Activation");
129 
130  IConnectableLayer* output = net->AddOutputLayer(0, "Output");
131 
132  input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
133  activation->GetOutputSlot(0).Connect(output->GetInputSlot(0));
134 
135  TensorInfo tensorInfo = TensorInfo({ 1, 24, 16, 3 }, DataType::Float32);
136  unsigned int numElements = tensorInfo.GetNumElements();
137  size_t totalBytes = numElements * sizeof(float);
138 
139  input->GetOutputSlot(0).SetTensorInfo(tensorInfo);
140  activation->GetOutputSlot(0).SetTensorInfo(tensorInfo);
141 
142  // Optimize the network
143  OptimizerOptions optOptions;
144  optOptions.m_ImportEnabled = true;
145  std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
146  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
147  CHECK(optNet);
148 
149  // Loads it into the runtime.
150  NetworkId netId;
151  std::string ignoredErrorMessage;
152  // Enable Importing
153  INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
154  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
155 
156  // Creates structures for input & output
157  const size_t alignment =
158  arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
159  size_t space = totalBytes + alignment + alignment;
160  auto inputData = std::make_unique<uint8_t[]>(space);
161  void* alignedInputPtr = inputData.get();
162  CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
163 
164  // Input with negative values
165  auto* intputPtr = reinterpret_cast<float*>(alignedInputPtr);
166  std::fill_n(intputPtr, numElements, -5.0f);
167 
168  auto outputData = std::make_unique<uint8_t[]>(space);
169  void* alignedOutputPtr = outputData.get();
170  CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
171  auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
172  std::fill_n(outputPtr, numElements, -10.0f);
173 
174  TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
175  inputTensorInfo.SetConstant(true);
176  InputTensors inputTensors
177  {
178  {0,armnn::ConstTensor(inputTensorInfo, alignedInputPtr)},
179  };
180  OutputTensors outputTensors
181  {
182  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
183  };
184 
185  runtime->GetProfiler(netId)->EnableProfiling(true);
186 
187  // Do the inference
188  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
189 
190  // Retrieve the Profiler.Print() output to get the workload execution
192  std::stringstream ss;
193  profilerManager.GetProfiler()->Print(ss);;
194  std::string dump = ss.str();
195 
196  // Contains ActivationWorkload
197  std::size_t found = dump.find("ActivationWorkload");
198  CHECK(found != std::string::npos);
199 
200  // Contains SyncMemGeneric
201  found = dump.find("SyncMemGeneric");
202  CHECK(found != std::string::npos);
203 
204  // Does not contain CopyMemGeneric
205  found = dump.find("CopyMemGeneric");
206  CHECK(found == std::string::npos);
207 
208  runtime->UnloadNetwork(netId);
209 
210  // Check output is as expected
211  // Validate result by checking that the output has no negative values
212  auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
213  CHECK(outputResult);
214  for(unsigned int i = 0; i < numElements; ++i)
215  {
216  CHECK(outputResult[i] >= 0);
217  }
218 }
219 
220 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:40
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:568
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:31
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:605
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
Definition: Profiling.cpp:580
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
TEST_CASE_FIXTURE(ClContextControlFixture, "CopyBetweenNeonAndGpu")
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1605
int NetworkId
Definition: IRuntime.hpp:25
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:198
GPU Execution: OpenCL: ArmCompute.
ArmNN performs an optimization on each model/network before it gets loaded for execution.
Definition: INetwork.hpp:120
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:25
This factory creates ClImportTensorHandles that refer to imported memory tensors. ...
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:217
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:516
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:197
virtual int Connect(IInputSlot &destination)=0
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
Definition: Descriptors.hpp:48
unsigned int GetNumElements() const
Definition: Tensor.hpp:196