25 static_cast<MemorySourceFlags>(MemorySource::Malloc));
28 unsigned int numElements =
info.GetNumElements();
31 auto handle = handleFactory.CreateTensorHandle(info);
34 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
37 const arm_compute::ActivationLayerInfo act_info(arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
38 arm_compute::CLActivationLayer act_func;
39 act_func.configure(&tensor,
nullptr, act_info);
42 const size_t totalBytes = tensor.info()->total_size();
43 const size_t alignment =
44 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
45 size_t space = totalBytes + alignment + alignment;
46 auto testData = std::make_unique<uint8_t[]>(space);
47 void* alignedPtr = testData.get();
48 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
54 auto* typedPtr =
reinterpret_cast<float*
>(alignedPtr);
55 std::fill_n(typedPtr, numElements, -5.0f);
59 arm_compute::CLScheduler::get().sync();
62 for(
unsigned int i = 0; i < numElements; ++i)
64 CHECK(typedPtr[i] == 0);
71 static_cast<MemorySourceFlags>(MemorySource::Malloc));
76 auto handle = handleFactory.CreateTensorHandle(info);
79 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
82 const size_t totalBytes = tensor.info()->total_size();
83 const size_t alignment =
84 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
85 size_t space = totalBytes + alignment + alignment;
86 auto testData = std::make_unique<uint8_t[]>(space);
87 void* alignedPtr = testData.get();
88 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
98 static_cast<MemorySourceFlags>(invalidMemSource));
103 auto handle = handleFactory.CreateTensorHandle(info);
106 std::vector<float> inputData
108 1.0f, 2.0f, 3.0f, 4.0f
127 descriptor.
m_Function = ActivationFunction::ReLu;
128 IConnectableLayer* activation = net->AddActivationLayer(descriptor,
"Activation");
137 size_t totalBytes = numElements *
sizeof(float);
151 std::string ignoredErrorMessage;
153 INetworkProperties networkProperties(
false, MemorySource::Malloc, MemorySource::Malloc);
154 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
157 const size_t alignment =
158 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
159 size_t space = totalBytes + alignment + alignment;
160 auto inputData = std::make_unique<uint8_t[]>(space);
161 void* alignedInputPtr = inputData.get();
162 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
165 auto* intputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
166 std::fill_n(intputPtr, numElements, -5.0f);
168 auto outputData = std::make_unique<uint8_t[]>(space);
169 void* alignedOutputPtr = outputData.get();
170 CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
171 auto* outputPtr =
reinterpret_cast<float*
>(alignedOutputPtr);
172 std::fill_n(outputPtr, numElements, -10.0f);
174 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
182 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
185 runtime->GetProfiler(netId)->EnableProfiling(
true);
188 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
192 std::stringstream ss;
194 std::string dump = ss.str();
197 std::size_t found = dump.find(
"ActivationWorkload");
198 CHECK(found != std::string::npos);
201 found = dump.find(
"SyncMemGeneric");
202 CHECK(found != std::string::npos);
205 found = dump.find(
"CopyMemGeneric");
206 CHECK(found == std::string::npos);
208 runtime->UnloadNetwork(netId);
212 auto* outputResult =
reinterpret_cast<float*
>(alignedOutputPtr);
214 for(
unsigned int i = 0; i < numElements; ++i)
216 CHECK(outputResult[i] >= 0);
static IRuntimePtr Create(const CreationOptions &options)
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
static ProfilerManager & GetInstance()
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
TEST_CASE_FIXTURE(ClContextControlFixture, "CopyBetweenNeonAndGpu")
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
GPU Execution: OpenCL: ArmCompute.
ArmNN performs an optimization on each model/network before it gets loaded for execution.
An ActivationDescriptor for the ActivationLayer.
This factory creates ClImportTensorHandles that refer to imported memory tensors. ...
MemorySource
Define the Memory Source to reduce copies.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
virtual int Connect(IInputSlot &destination)=0
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
unsigned int GetNumElements() const