25 static_cast<MemorySourceFlags>(MemorySource::Malloc));
28 unsigned int numElements =
info.GetNumElements();
31 auto handle = handleFactory.CreateTensorHandle(info);
34 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
37 const arm_compute::ActivationLayerInfo act_info(arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
38 arm_compute::CLActivationLayer act_func;
39 act_func.configure(&tensor,
nullptr, act_info);
42 const size_t totalBytes = tensor.info()->total_size();
43 const size_t alignment =
44 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
45 size_t space = totalBytes + alignment + alignment;
46 auto testData = std::make_unique<uint8_t[]>(space);
47 void* alignedPtr = testData.get();
48 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
54 auto* typedPtr =
reinterpret_cast<float*
>(alignedPtr);
55 std::fill_n(typedPtr, numElements, -5.0f);
59 arm_compute::CLScheduler::get().sync();
62 for(
unsigned int i = 0; i < numElements; ++i)
64 CHECK(typedPtr[i] == 0);
71 static_cast<MemorySourceFlags>(MemorySource::Malloc));
76 auto handle = handleFactory.CreateTensorHandle(info);
79 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
82 const size_t totalBytes = tensor.info()->total_size();
83 const size_t alignment =
84 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
85 size_t space = totalBytes + alignment + alignment;
86 auto testData = std::make_unique<uint8_t[]>(space);
87 void* alignedPtr = testData.get();
88 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
98 static_cast<MemorySourceFlags>(invalidMemSource));
103 auto handle = handleFactory.CreateTensorHandle(info);
106 std::vector<float> inputData
108 1.0f, 2.0f, 3.0f, 4.0f
127 descriptor.
m_Function = ActivationFunction::ReLu;
128 IConnectableLayer* activation = net->AddActivationLayer(descriptor,
"Activation");
137 size_t totalBytes = numElements *
sizeof(float);
151 std::string ignoredErrorMessage;
153 INetworkProperties networkProperties(
false, MemorySource::Malloc, MemorySource::Malloc);
154 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
157 const size_t alignment =
158 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
159 size_t space = totalBytes + alignment + alignment;
160 auto inputData = std::make_unique<uint8_t[]>(space);
161 void* alignedInputPtr = inputData.get();
162 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
165 auto* intputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
166 std::fill_n(intputPtr, numElements, -5.0f);
168 auto outputData = std::make_unique<uint8_t[]>(space);
169 void* alignedOutputPtr = outputData.get();
170 CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
171 auto* outputPtr =
reinterpret_cast<float*
>(alignedOutputPtr);
172 std::fill_n(outputPtr, numElements, -10.0f);
174 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
182 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
185 runtime->GetProfiler(netId)->EnableProfiling(
true);
188 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
192 std::stringstream ss;
194 std::string dump = ss.str();
197 std::size_t found = dump.find(
"ActivationWorkload");
198 CHECK(found != std::string::npos);
201 found = dump.find(
"SyncMemGeneric");
202 CHECK(found != std::string::npos);
205 found = dump.find(
"CopyMemGeneric");
206 CHECK(found == std::string::npos);
208 runtime->UnloadNetwork(netId);
212 auto* outputResult =
reinterpret_cast<float*
>(alignedOutputPtr);
214 for(
unsigned int i = 0; i < numElements; ++i)
216 CHECK(outputResult[i] >= 0);
223 static_cast<MemorySourceFlags>(MemorySource::Malloc));
228 auto handle = handleFactory.CreateTensorHandle(info);
231 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
234 const size_t totalBytes = tensor.info()->total_size();
235 const size_t alignment =
236 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
237 size_t space = totalBytes + alignment + alignment;
238 auto testData = std::make_unique<uint8_t[]>(space);
239 void* alignedPtr = testData.get();
240 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
247 TEST_CASE(
"ClCanBeImportedAlignedMemory")
250 static_cast<MemorySourceFlags>(MemorySource::Malloc));
255 auto handle = handleFactory.CreateTensorHandle(info);
257 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
260 const size_t totalBytes = tensor.info()->total_size();
261 const size_t alignment =
262 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
263 size_t space = totalBytes + alignment + alignment;
264 auto testData = std::make_unique<uint8_t[]>(space);
265 void* alignedPtr = testData.get();
266 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
269 CHECK(handle->CanBeImported(alignedPtr, MemorySource::Malloc) ==
true);
292 std::vector<float> kernel =
299 const std::vector<float> expectedOutput =
306 unsigned int numElements = inputInfo.GetNumElements();
307 size_t totalBytes = numElements *
sizeof(float);
328 inputLayer->GetOutputSlot(0).Connect(convLayer->
GetInputSlot(0));
329 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
344 std::string ignoredErrorMessage;
346 INetworkProperties networkProperties(
false, MemorySource::Undefined, MemorySource::Undefined);
347 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
350 const size_t alignment =
351 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
352 size_t space = totalBytes + alignment + alignment;
353 auto inputData = std::make_unique<uint8_t[]>(space);
354 void* alignedInputPtr = inputData.get();
355 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
358 auto* inputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
373 auto outputData = std::make_unique<uint8_t[]>(space);
374 void* alignedOutputPtr = outputData.get();
375 CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
376 auto* outputPtr =
reinterpret_cast<float*
>(alignedOutputPtr);
377 std::fill_n(outputPtr, numElements, -10.0f);
379 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
387 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
390 runtime->GetProfiler(netId)->EnableProfiling(
true);
392 INFO(
"Run ImportInputs");
393 std::vector<ImportedInputId> importedInputIds =
394 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
395 std::vector<ImportedOutputId> importedOutputIds =
396 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
399 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
403 std::stringstream ss;
405 std::string dump = ss.str();
408 std::size_t found = dump.find(
"Convolution2dWorkload");
409 CHECK(found != std::string::npos);
412 found = dump.find(
"SyncMemGeneric");
413 CHECK(found != std::string::npos);
416 found = dump.find(
"CopyMemGeneric");
417 CHECK(found == std::string::npos);
419 runtime->UnloadNetwork(netId);
423 auto* outputResult =
reinterpret_cast<float*
>(alignedOutputPtr);
427 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
uint32_t m_PadBottom
Padding bottom value in the height dimension.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
static IRuntimePtr Create(const CreationOptions &options)
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
static ProfilerManager & GetInstance()
A Convolution2dDescriptor for the Convolution2dLayer.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
uint32_t m_PadRight
Padding right value in the width dimension.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
uint32_t m_PadTop
Padding top value in the height dimension.
TEST_CASE_FIXTURE(ClContextControlFixture, "CopyBetweenNeonAndGpu")
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
#define ARMNN_ASSERT(COND)
GPU Execution: OpenCL: ArmCompute.
ArmNN performs an optimization on each model/network before it gets loaded for execution.
An ActivationDescriptor for the ActivationLayer.
This factory creates ClImportTensorHandles that refer to imported memory tensors. ...
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
MemorySource
Define the Memory Source to reduce copies.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
virtual int Connect(IInputSlot &destination)=0
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
uint32_t m_PadLeft
Padding left value in the width dimension.
unsigned int GetNumElements() const