ArmNN
 21.05
ClFallbackTests.cpp File Reference
#include <backendsCommon/test/CommonTestUtils.hpp>
#include <test/GraphUtils.hpp>
#include <boost/test/unit_test.hpp>

Go to the source code of this file.

Functions

 BOOST_AUTO_TEST_CASE (ClImportEnabledFallbackToNeon)
 
 BOOST_AUTO_TEST_CASE (ClImportDisabledFallbackToNeon)
 
 BOOST_AUTO_TEST_CASE (ClImportEnabledFallbackSubgraphToNeon)
 
 BOOST_AUTO_TEST_CASE (ClImportDisableFallbackSubgraphToNeon)
 

Function Documentation

◆ BOOST_AUTO_TEST_CASE() [1/4]

BOOST_AUTO_TEST_CASE ( ClImportEnabledFallbackToNeon  )

Definition at line 14 of file ClFallbackTests.cpp.

References IConnectableLayer::BackendSelectionHint(), CheckOrder(), IOutputSlot::Connect(), armnn::CpuAcc, IRuntime::Create(), INetwork::Create(), armnn::Float32, Layer::GetBackendId(), GetFirstLayerWithName(), armnn::GetGraphForTesting(), IConnectableLayer::GetInputSlot(), ProfilerManager::GetInstance(), TensorInfo::GetNumElements(), IConnectableLayer::GetOutputSlot(), ProfilerManager::GetProfiler(), Layer::GetType(), armnn::GpuAcc, armnn::info, OptimizerOptions::m_ImportEnabled, armnn::Malloc, armnn::MemCopy, armnn::Optimize(), IProfiler::Print(), and IOutputSlot::SetTensorInfo().

15 {
16  using namespace armnn;
17 
19  IRuntimePtr runtime(IRuntime::Create(options));
20 
21  // Builds up the structure of the network.
23 
24  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
25  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
26  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
27  IConnectableLayer* add = net->AddAdditionLayer("add");
28  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
29  IConnectableLayer* output = net->AddOutputLayer(0, "output");
30 
31  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
32  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
33  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
34  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
35  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
36 
37  TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
38 
39  input0->GetOutputSlot(0).SetTensorInfo(info);
40  input1->GetOutputSlot(0).SetTensorInfo(info);
41  input2->GetOutputSlot(0).SetTensorInfo(info);
42  add->GetOutputSlot(0).SetTensorInfo(info);
43  sub->GetOutputSlot(0).SetTensorInfo(info);
44 
45  std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
46  // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
47  sub->BackendSelectionHint(backends[1]);
48 
49  // optimize the network
50  OptimizerOptions optOptions;
51  optOptions.m_ImportEnabled = true;
52  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
53 
54  Graph& graph = GetGraphForTesting(optNet.get());
55 
56  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
57  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
58  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
59  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
60  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
61  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
62  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
63 
64  // Checks order is valid.
65  BOOST_TEST(CheckOrder(graph, layer0, layer1));
66  BOOST_TEST(CheckOrder(graph, layer1, layer2));
67  BOOST_TEST(CheckOrder(graph, layer2, layer3));
68  BOOST_TEST(CheckOrder(graph, layer3, layer4));
69  BOOST_TEST(CheckOrder(graph, layer4, layer5));
70  BOOST_TEST(CheckOrder(graph, layer5, layer6));
71 
72  // Use memory import between backends
73  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
74 
75  // Correctly use backend hint
76  BOOST_TEST((layer5->GetBackendId() == Compute::CpuAcc ));
77 
78  // Load it into the runtime. It should pass.
79  NetworkId netId;
80  std::string ignoredErrorMessage;
82  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
83 
84  // Creates structures for input & output
85  std::vector<float> inputValue0
86  {
87  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
88  };
89  std::vector<float> inputValue1
90  {
91  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
92  };
93  std::vector<float> inputData2
94  {
95  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
96  };
97 
98  std::vector<float> outputData(16);
99 
100  std::vector<float> expectedOutput
101  {
102  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f, 11.0f, 9.0f, 7.0f, 5.0f
103  };
104 
105  // Prepare aligned data
106  unsigned int numElements = info.GetNumElements();
107  size_t totalBytes = numElements * sizeof(float);
108  const size_t alignment = 64;
109  size_t space = totalBytes + alignment + alignment;
110  auto inputData0 = std::make_unique<uint8_t[]>(space);
111  void* alignedInputPtr0 = inputData0.get();
112  BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr0, space));
113 
114  auto* intputPtr0 = reinterpret_cast<float*>(alignedInputPtr0);
115  std::copy(inputValue0.begin(), inputValue0.end(), intputPtr0);
116 
117  auto inputData1 = std::make_unique<uint8_t[]>(space);
118  void* alignedInputPtr1 = inputData1.get();
119  BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr1, space));
120 
121  auto* intputPtr1 = reinterpret_cast<float*>(alignedInputPtr1);
122  std::copy(inputValue1.begin(), inputValue1.end(), intputPtr1);
123 
124  InputTensors inputTensors
125  {
126  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputPtr0) },
127  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), alignedInputPtr1) },
128  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
129  };
130  OutputTensors outputTensors
131  {
132  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
133  };
134 
135  runtime->GetProfiler(netId)->EnableProfiling(true);
136 
137  // Do the inference
138  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
139 
140  // Retrieve the Profiler.Print() output to get the workload execution
142  std::stringstream ss;
143  profilerManager.GetProfiler()->Print(ss);;
144  std::string dump = ss.str();
145 
146  // Executed Subtraction using CpuAcc
147  std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
148  BOOST_TEST(found != std::string::npos);
149 
150  // Contain CopyMemGeneric
151  found = dump.find("CopyMemGeneric");
152  BOOST_TEST(found != std::string::npos);
153 
154  // Check output is as expected
155  BOOST_TEST(outputData == expectedOutput);
156 
157  runtime->UnloadNetwork(netId);
158 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:37
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:62
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:489
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
Definition: GraphUtils.cpp:68
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:28
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:522
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
Copyright (c) 2021 ARM Limited and Contributors.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
Definition: Profiling.cpp:501
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:306
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1568
int NetworkId
Definition: IRuntime.hpp:22
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:314
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:265
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:174
GPU Execution: OpenCL: ArmCompute.
const BackendId & GetBackendId() const
Definition: Layer.hpp:269
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:25
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:173
virtual int Connect(IInputSlot &destination)=0
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:529
unsigned int GetNumElements() const
Definition: Tensor.hpp:192

◆ BOOST_AUTO_TEST_CASE() [2/4]

BOOST_AUTO_TEST_CASE ( ClImportDisabledFallbackToNeon  )

Definition at line 160 of file ClFallbackTests.cpp.

References IConnectableLayer::BackendSelectionHint(), CheckOrder(), IOutputSlot::Connect(), armnn::CpuAcc, IRuntime::Create(), INetwork::Create(), armnn::Float32, Layer::GetBackendId(), GetFirstLayerWithName(), armnn::GetGraphForTesting(), IConnectableLayer::GetInputSlot(), ProfilerManager::GetInstance(), IConnectableLayer::GetOutputSlot(), ProfilerManager::GetProfiler(), Layer::GetType(), armnn::GpuAcc, armnn::info, armnn::MemCopy, armnn::Optimize(), IProfiler::Print(), and IOutputSlot::SetTensorInfo().

161 {
162  using namespace armnn;
163 
165  IRuntimePtr runtime(IRuntime::Create(options));
166 
167  // Builds up the structure of the network.
169 
170  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
171  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
172  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
173  IConnectableLayer* add = net->AddAdditionLayer("add");
174  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
175  IConnectableLayer* output = net->AddOutputLayer(0, "output");
176 
177  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
178  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
179  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
180  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
181  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
182 
183  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
184 
185  input0->GetOutputSlot(0).SetTensorInfo(info);
186  input1->GetOutputSlot(0).SetTensorInfo(info);
187  input2->GetOutputSlot(0).SetTensorInfo(info);
188  add->GetOutputSlot(0).SetTensorInfo(info);
189  sub->GetOutputSlot(0).SetTensorInfo(info);
190 
191  std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
192  // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
193  sub->BackendSelectionHint(backends[1]);
194 
195  // optimize the network
196  OptimizerOptions optOptions;
197  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
198 
199  Graph& graph = GetGraphForTesting(optNet.get());
200 
201  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
202  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
203  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
204  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
205  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
206  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
207  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
208 
209  // Checks order is valid.
210  BOOST_TEST(CheckOrder(graph, layer0, layer1));
211  BOOST_TEST(CheckOrder(graph, layer1, layer2));
212  BOOST_TEST(CheckOrder(graph, layer2, layer3));
213  BOOST_TEST(CheckOrder(graph, layer3, layer4));
214  BOOST_TEST(CheckOrder(graph, layer4, layer5));
215  BOOST_TEST(CheckOrder(graph, layer5, layer6));
216 
217  // Use memory import between backends
218  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
219 
220  // Correctly use backend hint
221  BOOST_TEST((layer5->GetBackendId() == Compute::CpuAcc ));
222 
223  // Load it into the runtime. It should pass.
224  NetworkId netId;
225  runtime->LoadNetwork(netId, std::move(optNet));
226 
227  // Creates structures for input & output
228  std::vector<float> inputData0
229  {
230  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
231  };
232  std::vector<float> inputData1
233  {
234  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
235  };
236  std::vector<float> inputData2
237  {
238  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
239  };
240 
241  std::vector<float> outputData(12);
242 
243  std::vector<float> expectedOutput
244  {
245  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
246  };
247 
248  InputTensors inputTensors
249  {
250  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
251  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
252  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
253  };
254  OutputTensors outputTensors
255  {
256  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
257  };
258 
259  runtime->GetProfiler(netId)->EnableProfiling(true);
260 
261  // Do the inference
262  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
263 
264  // Retrieve the Profiler.Print() output to get the workload execution
266  std::stringstream ss;
267  profilerManager.GetProfiler()->Print(ss);;
268  std::string dump = ss.str();
269 
270  // Executed Subtraction using CpuAcc
271  std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
272  BOOST_TEST(found != std::string::npos);
273 
274  // Contain CopyMemGeneric
275  found = dump.find("CopyMemGeneric");
276  BOOST_TEST(found != std::string::npos);
277 
278  // Check output is as expected
279  BOOST_TEST(outputData == expectedOutput);
280 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:37
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:62
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:489
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
Definition: GraphUtils.cpp:68
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:28
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:522
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
Copyright (c) 2021 ARM Limited and Contributors.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
Definition: Profiling.cpp:501
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:306
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1568
int NetworkId
Definition: IRuntime.hpp:22
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:314
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:265
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:174
GPU Execution: OpenCL: ArmCompute.
const BackendId & GetBackendId() const
Definition: Layer.hpp:269
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:25
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:173
virtual int Connect(IInputSlot &destination)=0
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:529

◆ BOOST_AUTO_TEST_CASE() [3/4]

BOOST_AUTO_TEST_CASE ( ClImportEnabledFallbackSubgraphToNeon  )

Definition at line 282 of file ClFallbackTests.cpp.

References IConnectableLayer::BackendSelectionHint(), CheckOrder(), IOutputSlot::Connect(), armnn::CpuAcc, IRuntime::Create(), INetwork::Create(), armnn::Float32, Layer::GetBackendId(), GetFirstLayerWithName(), armnn::GetGraphForTesting(), IConnectableLayer::GetInputSlot(), ProfilerManager::GetInstance(), TensorInfo::GetNumElements(), IConnectableLayer::GetOutputSlot(), ProfilerManager::GetProfiler(), Layer::GetType(), armnn::GpuAcc, armnn::info, OptimizerOptions::m_ImportEnabled, Pooling2dDescriptor::m_PoolHeight, Pooling2dDescriptor::m_PoolWidth, Pooling2dDescriptor::m_StrideX, Pooling2dDescriptor::m_StrideY, armnn::Malloc, armnn::MemCopy, armnn::Optimize(), IProfiler::Print(), and IOutputSlot::SetTensorInfo().

283 {
284  using namespace armnn;
285 
287  IRuntimePtr runtime(IRuntime::Create(options));
288 
289  // Builds up the structure of the network.
291 
292  Pooling2dDescriptor desc;
293  desc.m_PoolWidth = 2;
294  desc.m_PoolHeight = 2;
295  desc.m_StrideX = 2;
296  desc.m_StrideY = 2;
297 
298  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
299  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
300  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
301  IConnectableLayer* add = net->AddAdditionLayer("add");
302  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
303  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
304  IConnectableLayer* output = net->AddOutputLayer(0, "output");
305 
306  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
307  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
308  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
309  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
310  sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
311  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
312 
313  TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
314  TensorInfo poolingInfo = TensorInfo({ 1, 2, 2, 1 }, DataType::Float32);
315 
316  input0->GetOutputSlot(0).SetTensorInfo(info);
317  input1->GetOutputSlot(0).SetTensorInfo(info);
318  input2->GetOutputSlot(0).SetTensorInfo(info);
319  add->GetOutputSlot(0).SetTensorInfo(info);
320  sub->GetOutputSlot(0).SetTensorInfo(info);
321  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
322 
323  std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
324  // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
325  sub->BackendSelectionHint(backends[1]);
326 
327  // optimize the network
328  OptimizerOptions optOptions;
329  optOptions.m_ImportEnabled = true;
330  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
331 
332  Graph& graph = GetGraphForTesting(optNet.get());
333 
334  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
335  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
336  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
337  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
338  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
339  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
340  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
341  armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
342  armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
343 
344  // Checks order is valid.
345  BOOST_TEST(CheckOrder(graph, layer0, layer1));
346  BOOST_TEST(CheckOrder(graph, layer1, layer2));
347  BOOST_TEST(CheckOrder(graph, layer2, layer3));
348  BOOST_TEST(CheckOrder(graph, layer3, layer4));
349  BOOST_TEST(CheckOrder(graph, layer4, layer5));
350  BOOST_TEST(CheckOrder(graph, layer5, layer6));
351  BOOST_TEST(CheckOrder(graph, layer6, layer7));
352  BOOST_TEST(CheckOrder(graph, layer7, layer8));
353 
354  // Use memory import between backends
355  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
356  BOOST_TEST((layer6->GetType() == LayerType::MemCopy));
357 
358  // Correctly use backend hint
359  BOOST_TEST((layer5->GetBackendId() == Compute::CpuAcc ));
360 
361  // Load it into the runtime. It should pass.
362  NetworkId netId;
363  std::string ignoredErrorMessage;
365  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
366 
367  // Creates structures for input & output
368  std::vector<float> inputValue0
369  {
370  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
371  };
372  std::vector<float> inputValue1
373  {
374  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
375  };
376  std::vector<float> inputData2
377  {
378  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
379  };
380 
381  std::vector<float> outputData(4);
382 
383  std::vector<float> expectedOutput{ 11.0f, 3.0f, -5.0f, 11.0f };
384 
385  unsigned int numElements = info.GetNumElements();
386  size_t totalBytes = numElements * sizeof(float);
387  const size_t alignment = 64;
388  size_t space = totalBytes + alignment + alignment;
389  auto inputData0 = std::make_unique<uint8_t[]>(space);
390  void* alignedInputPtr0 = inputData0.get();
391  BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr0, space));
392 
393  auto* intputPtr0 = reinterpret_cast<float*>(alignedInputPtr0);
394  std::copy(inputValue0.begin(), inputValue0.end(), intputPtr0);
395 
396  auto inputData1 = std::make_unique<uint8_t[]>(space);
397  void* alignedInputPtr1 = inputData1.get();
398  BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr1, space));
399 
400  auto* intputPtr1 = reinterpret_cast<float*>(alignedInputPtr1);
401  std::copy(inputValue1.begin(), inputValue1.end(), intputPtr1);
402 
403  InputTensors inputTensors
404  {
405  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputPtr0) },
406  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), alignedInputPtr1) },
407  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
408  };
409  OutputTensors outputTensors
410  {
411  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
412  };
413 
414  runtime->GetProfiler(netId)->EnableProfiling(true);
415 
416  // Do the inference
417  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
418 
419  // Retrieve the Profiler.Print() output to get the workload execution
421  std::stringstream ss;
422  profilerManager.GetProfiler()->Print(ss);;
423  std::string dump = ss.str();
424 
425  // Executed Subtraction using CpuAcc
426  std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
427  BOOST_TEST(found != std::string::npos);
428 
429  // Correctly switch back to GpuAcc
430  found = dump.find("ClPooling2dWorkload_Execute");
431  BOOST_TEST(found != std::string::npos);
432 
433  // Contain CopyMemGeneric
434  found = dump.find("CopyMemGeneric");
435  BOOST_TEST(found != std::string::npos);
436 
437  // Check output is as expected
438  BOOST_TEST(outputData == expectedOutput);
439 
440  runtime->UnloadNetwork(netId);
441 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:37
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:62
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:489
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
Definition: GraphUtils.cpp:68
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
uint32_t m_PoolWidth
Pooling width value.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:28
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:522
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
Copyright (c) 2021 ARM Limited and Contributors.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
Definition: Profiling.cpp:501
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:306
uint32_t m_PoolHeight
Pooling height value.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1568
int NetworkId
Definition: IRuntime.hpp:22
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:314
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:265
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:174
GPU Execution: OpenCL: ArmCompute.
const BackendId & GetBackendId() const
Definition: Layer.hpp:269
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:25
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:173
virtual int Connect(IInputSlot &destination)=0
A Pooling2dDescriptor for the Pooling2dLayer.
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:529
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
unsigned int GetNumElements() const
Definition: Tensor.hpp:192

◆ BOOST_AUTO_TEST_CASE() [4/4]

BOOST_AUTO_TEST_CASE ( ClImportDisableFallbackSubgraphToNeon  )

Definition at line 443 of file ClFallbackTests.cpp.

References IConnectableLayer::BackendSelectionHint(), BOOST_AUTO_TEST_SUITE_END(), CheckOrder(), IOutputSlot::Connect(), armnn::CpuAcc, IRuntime::Create(), INetwork::Create(), armnn::Float32, Layer::GetBackendId(), GetFirstLayerWithName(), armnn::GetGraphForTesting(), IConnectableLayer::GetInputSlot(), ProfilerManager::GetInstance(), IConnectableLayer::GetOutputSlot(), ProfilerManager::GetProfiler(), Layer::GetType(), armnn::GpuAcc, armnn::info, armnn::MemCopy, armnn::Optimize(), IProfiler::Print(), and IOutputSlot::SetTensorInfo().

444 {
445  using namespace armnn;
446 
448  IRuntimePtr runtime(IRuntime::Create(options));
449 
450  // Builds up the structure of the network.
452 
453  Pooling2dDescriptor desc;
454 
455  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
456  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
457  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
458  IConnectableLayer* add = net->AddAdditionLayer("add");
459  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
460  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
461  IConnectableLayer* output = net->AddOutputLayer(0, "output");
462 
463  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
464  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
465  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
466  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
467  sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
468  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
469 
470  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
471  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
472 
473  input0->GetOutputSlot(0).SetTensorInfo(info);
474  input1->GetOutputSlot(0).SetTensorInfo(info);
475  input2->GetOutputSlot(0).SetTensorInfo(info);
476  add->GetOutputSlot(0).SetTensorInfo(info);
477  sub->GetOutputSlot(0).SetTensorInfo(info);
478  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
479 
480  std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
481  // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
482  sub->BackendSelectionHint(backends[1]);
483 
484  // optimize the network
485  OptimizerOptions optOptions;
486  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
487 
488  Graph& graph = GetGraphForTesting(optNet.get());
489 
490  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
491  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
492  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
493  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
494  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
495  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
496  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
497  armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
498  armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
499 
500  // Checks order is valid.
501  BOOST_TEST(CheckOrder(graph, layer0, layer1));
502  BOOST_TEST(CheckOrder(graph, layer1, layer2));
503  BOOST_TEST(CheckOrder(graph, layer2, layer3));
504  BOOST_TEST(CheckOrder(graph, layer3, layer4));
505  BOOST_TEST(CheckOrder(graph, layer4, layer5));
506  BOOST_TEST(CheckOrder(graph, layer5, layer6));
507  BOOST_TEST(CheckOrder(graph, layer6, layer7));
508  BOOST_TEST(CheckOrder(graph, layer7, layer8));
509 
510  // Use memory import between backends
511  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
512  BOOST_TEST((layer6->GetType() == LayerType::MemCopy));
513 
514  // Correctly use backend hint
515  BOOST_TEST((layer5->GetBackendId() == Compute::CpuAcc ));
516 
517  // Load it into the runtime. It should pass.
518  NetworkId netId;
519  runtime->LoadNetwork(netId, std::move(optNet));
520 
521  // Creates structures for input & output
522  std::vector<float> inputData0
523  {
524  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
525  };
526  std::vector<float> inputData1
527  {
528  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
529  };
530  std::vector<float> inputData2
531  {
532  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
533  };
534 
535  std::vector<float> outputData(2);
536 
537  std::vector<float> expectedOutput{ 11.0f, -1.0f };
538 
539  InputTensors inputTensors
540  {
541  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
542  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
543  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
544  };
545  OutputTensors outputTensors
546  {
547  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
548  };
549 
550  runtime->GetProfiler(netId)->EnableProfiling(true);
551 
552  // Do the inference
553  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
554 
555  // Retrieve the Profiler.Print() output to get the workload execution
557  std::stringstream ss;
558  profilerManager.GetProfiler()->Print(ss);;
559  std::string dump = ss.str();
560 
561  // Executed Subtraction using CpuAcc
562  std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
563  BOOST_TEST(found != std::string::npos);
564 
565  // Correctly switch back to GpuAcc
566  found = dump.find("ClPooling2dWorkload_Execute");
567  BOOST_TEST(found != std::string::npos);
568 
569  // Contain CopyMemGeneric
570  found = dump.find("CopyMemGeneric");
571  BOOST_TEST(found != std::string::npos);
572 
573  // Check output is as expected
574  BOOST_TEST(outputData == expectedOutput);
575 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:37
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:62
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:489
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
Definition: GraphUtils.cpp:68
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:28
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:522
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
Copyright (c) 2021 ARM Limited and Contributors.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
Definition: Profiling.cpp:501
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:306
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1568
int NetworkId
Definition: IRuntime.hpp:22
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:314
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:265
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:174
GPU Execution: OpenCL: ArmCompute.
const BackendId & GetBackendId() const
Definition: Layer.hpp:269
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:25
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:173
virtual int Connect(IInputSlot &destination)=0
A Pooling2dDescriptor for the Pooling2dLayer.
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:529