ArmNN
 22.02
ClFallbackTests.cpp File Reference
#include <CommonTestUtils.hpp>
#include <GraphUtils.hpp>
#include <doctest/doctest.h>

Go to the source code of this file.

Functions

 TEST_SUITE ("ClFallback")
 

Function Documentation

◆ TEST_SUITE()

TEST_SUITE ( "ClFallback"  )

Definition at line 12 of file ClFallbackTests.cpp.

References IConnectableLayer::BackendSelectionHint(), CheckOrder(), IOutputSlot::Connect(), armnn::CpuAcc, IRuntime::Create(), INetwork::Create(), armnn::Float32, Layer::GetBackendId(), GetFirstLayerWithName(), armnn::GetGraphForTesting(), IConnectableLayer::GetInputSlot(), ProfilerManager::GetInstance(), TensorInfo::GetNumElements(), IConnectableLayer::GetOutputSlot(), ProfilerManager::GetProfiler(), Layer::GetType(), armnn::GpuAcc, armnn::info, OptimizerOptions::m_ImportEnabled, Pooling2dDescriptor::m_PoolHeight, Pooling2dDescriptor::m_PoolWidth, Pooling2dDescriptor::m_StrideX, Pooling2dDescriptor::m_StrideY, armnn::Malloc, armnn::MemCopy, armnn::Optimize(), IProfiler::Print(), TensorInfo::SetConstant(), and IOutputSlot::SetTensorInfo().

13 {
14 TEST_CASE("ClImportEnabledFallbackToNeon")
15 {
16  using namespace armnn;
17 
19  IRuntimePtr runtime(IRuntime::Create(options));
20 
21  // Builds up the structure of the network.
23 
24  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
25  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
26  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
27  IConnectableLayer* add = net->AddAdditionLayer("add");
28  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
29  IConnectableLayer* output = net->AddOutputLayer(0, "output");
30 
31  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
32  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
33  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
34  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
35  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
36 
37  TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
38  info.SetConstant(true);
39 
40  input0->GetOutputSlot(0).SetTensorInfo(info);
41  input1->GetOutputSlot(0).SetTensorInfo(info);
42  input2->GetOutputSlot(0).SetTensorInfo(info);
43  add->GetOutputSlot(0).SetTensorInfo(info);
44  sub->GetOutputSlot(0).SetTensorInfo(info);
45 
46  std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
47  // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
48  sub->BackendSelectionHint(backends[1]);
49 
50  // optimize the network
51  OptimizerOptions optOptions;
52  optOptions.m_ImportEnabled = true;
53  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
54 
55  Graph& graph = GetGraphForTesting(optNet.get());
56 
57  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
58  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
59  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
60  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
61  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
62  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
63  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
64 
65  // Checks order is valid.
66  CHECK(CheckOrder(graph, layer0, layer1));
67  CHECK(CheckOrder(graph, layer1, layer2));
68  CHECK(CheckOrder(graph, layer2, layer3));
69  CHECK(CheckOrder(graph, layer3, layer4));
70  CHECK(CheckOrder(graph, layer4, layer5));
71  CHECK(CheckOrder(graph, layer5, layer6));
72 
73  // Use memory import between backends
74  CHECK((layer4->GetType() == LayerType::MemCopy));
75 
76  // Correctly use backend hint
77  CHECK((layer5->GetBackendId() == Compute::CpuAcc ));
78 
79  // Load it into the runtime. It should pass.
80  NetworkId netId;
81  std::string ignoredErrorMessage;
83  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
84 
85  // Creates structures for input & output
86  std::vector<float> inputValue0
87  {
88  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
89  };
90  std::vector<float> inputValue1
91  {
92  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
93  };
94  std::vector<float> inputData2
95  {
96  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
97  };
98 
99  std::vector<float> outputData(16);
100 
101  std::vector<float> expectedOutput
102  {
103  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f, 11.0f, 9.0f, 7.0f, 5.0f
104  };
105 
106  // Prepare aligned data
107  unsigned int numElements = info.GetNumElements();
108  size_t totalBytes = numElements * sizeof(float);
109  const size_t alignment = 64;
110  size_t space = totalBytes + alignment + alignment;
111  auto inputData0 = std::make_unique<uint8_t[]>(space);
112  void* alignedInputPtr0 = inputData0.get();
113  CHECK(std::align(alignment, totalBytes, alignedInputPtr0, space));
114 
115  auto* intputPtr0 = reinterpret_cast<float*>(alignedInputPtr0);
116  std::copy(inputValue0.begin(), inputValue0.end(), intputPtr0);
117 
118  auto inputData1 = std::make_unique<uint8_t[]>(space);
119  void* alignedInputPtr1 = inputData1.get();
120  CHECK(std::align(alignment, totalBytes, alignedInputPtr1, space));
121 
122  auto* intputPtr1 = reinterpret_cast<float*>(alignedInputPtr1);
123  std::copy(inputValue1.begin(), inputValue1.end(), intputPtr1);
124 
125  InputTensors inputTensors
126  {
127  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputPtr0) },
128  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), alignedInputPtr1) },
129  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
130  };
131  OutputTensors outputTensors
132  {
133  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
134  };
135 
136  runtime->GetProfiler(netId)->EnableProfiling(true);
137 
138  // Do the inference
139  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
140 
141  // Retrieve the Profiler.Print() output to get the workload execution
143  std::stringstream ss;
144  profilerManager.GetProfiler()->Print(ss);;
145  std::string dump = ss.str();
146 
147  // Executed Subtraction using CpuAcc
148  std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
149  CHECK(found != std::string::npos);
150 
151  // Contain CopyMemGeneric
152  found = dump.find("CopyMemGeneric");
153  CHECK(found != std::string::npos);
154 
155  // Check output is as expected
156  CHECK(outputData == expectedOutput);
157 
158  runtime->UnloadNetwork(netId);
159 }
160 
161 TEST_CASE("ClImportDisabledFallbackToNeon")
162 {
163  using namespace armnn;
164 
166  IRuntimePtr runtime(IRuntime::Create(options));
167 
168  // Builds up the structure of the network.
170 
171  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
172  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
173  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
174  IConnectableLayer* add = net->AddAdditionLayer("add");
175  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
176  IConnectableLayer* output = net->AddOutputLayer(0, "output");
177 
178  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
179  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
180  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
181  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
182  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
183 
184  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
185  info.SetConstant(true);
186 
187  input0->GetOutputSlot(0).SetTensorInfo(info);
188  input1->GetOutputSlot(0).SetTensorInfo(info);
189  input2->GetOutputSlot(0).SetTensorInfo(info);
190  add->GetOutputSlot(0).SetTensorInfo(info);
191  sub->GetOutputSlot(0).SetTensorInfo(info);
192 
193  std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
194  // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
195  sub->BackendSelectionHint(backends[1]);
196 
197  // optimize the network
198  OptimizerOptions optOptions;
199  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
200 
201  Graph& graph = GetGraphForTesting(optNet.get());
202 
203  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
204  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
205  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
206  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
207  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
208  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
209  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
210 
211  // Checks order is valid.
212  CHECK(CheckOrder(graph, layer0, layer1));
213  CHECK(CheckOrder(graph, layer1, layer2));
214  CHECK(CheckOrder(graph, layer2, layer3));
215  CHECK(CheckOrder(graph, layer3, layer4));
216  CHECK(CheckOrder(graph, layer4, layer5));
217  CHECK(CheckOrder(graph, layer5, layer6));
218 
219  // Use memory import between backends
220  CHECK((layer4->GetType() == LayerType::MemCopy));
221 
222  // Correctly use backend hint
223  CHECK((layer5->GetBackendId() == Compute::CpuAcc ));
224 
225  // Load it into the runtime. It should pass.
226  NetworkId netId;
227  runtime->LoadNetwork(netId, std::move(optNet));
228 
229  // Creates structures for input & output
230  std::vector<float> inputData0
231  {
232  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
233  };
234  std::vector<float> inputData1
235  {
236  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
237  };
238  std::vector<float> inputData2
239  {
240  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
241  };
242 
243  std::vector<float> outputData(12);
244 
245  std::vector<float> expectedOutput
246  {
247  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
248  };
249 
250  InputTensors inputTensors
251  {
252  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
253  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
254  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
255  };
256  OutputTensors outputTensors
257  {
258  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
259  };
260 
261  runtime->GetProfiler(netId)->EnableProfiling(true);
262 
263  // Do the inference
264  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
265 
266  // Retrieve the Profiler.Print() output to get the workload execution
268  std::stringstream ss;
269  profilerManager.GetProfiler()->Print(ss);;
270  std::string dump = ss.str();
271 
272  // Executed Subtraction using CpuAcc
273  std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
274  CHECK(found != std::string::npos);
275 
276  // Contain CopyMemGeneric
277  found = dump.find("CopyMemGeneric");
278  CHECK(found != std::string::npos);
279 
280  // Check output is as expected
281  CHECK(outputData == expectedOutput);
282 }
283 
284 TEST_CASE("ClImportEnabledFallbackSubgraphToNeon")
285 {
286  using namespace armnn;
287 
289  IRuntimePtr runtime(IRuntime::Create(options));
290 
291  // Builds up the structure of the network.
293 
294  Pooling2dDescriptor desc;
295  desc.m_PoolWidth = 2;
296  desc.m_PoolHeight = 2;
297  desc.m_StrideX = 2;
298  desc.m_StrideY = 2;
299 
300  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
301  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
302  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
303  IConnectableLayer* add = net->AddAdditionLayer("add");
304  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
305  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
306  IConnectableLayer* output = net->AddOutputLayer(0, "output");
307 
308  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
309  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
310  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
311  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
312  sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
313  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
314 
315  TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
316  info.SetConstant(true);
317  TensorInfo poolingInfo = TensorInfo({ 1, 2, 2, 1 }, DataType::Float32);
318 
319  input0->GetOutputSlot(0).SetTensorInfo(info);
320  input1->GetOutputSlot(0).SetTensorInfo(info);
321  input2->GetOutputSlot(0).SetTensorInfo(info);
322  add->GetOutputSlot(0).SetTensorInfo(info);
323  sub->GetOutputSlot(0).SetTensorInfo(info);
324  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
325 
326  std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
327  // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
328  sub->BackendSelectionHint(backends[1]);
329 
330  // optimize the network
331  OptimizerOptions optOptions;
332  optOptions.m_ImportEnabled = true;
333  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
334 
335  Graph& graph = GetGraphForTesting(optNet.get());
336 
337  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
338  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
339  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
340  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
341  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
342  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
343  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
344  armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
345  armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
346 
347  // Checks order is valid.
348  CHECK(CheckOrder(graph, layer0, layer1));
349  CHECK(CheckOrder(graph, layer1, layer2));
350  CHECK(CheckOrder(graph, layer2, layer3));
351  CHECK(CheckOrder(graph, layer3, layer4));
352  CHECK(CheckOrder(graph, layer4, layer5));
353  CHECK(CheckOrder(graph, layer5, layer6));
354  CHECK(CheckOrder(graph, layer6, layer7));
355  CHECK(CheckOrder(graph, layer7, layer8));
356 
357  // Use memory import between backends
358  CHECK((layer4->GetType() == LayerType::MemCopy));
359  CHECK((layer6->GetType() == LayerType::MemCopy));
360 
361  // Correctly use backend hint
362  CHECK((layer5->GetBackendId() == Compute::CpuAcc ));
363 
364  // Load it into the runtime. It should pass.
365  NetworkId netId;
366  std::string ignoredErrorMessage;
368  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
369 
370  // Creates structures for input & output
371  std::vector<float> inputValue0
372  {
373  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
374  };
375  std::vector<float> inputValue1
376  {
377  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
378  };
379  std::vector<float> inputData2
380  {
381  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
382  };
383 
384  std::vector<float> outputData(4);
385 
386  std::vector<float> expectedOutput{ 11.0f, 3.0f, -5.0f, 11.0f };
387 
388  unsigned int numElements = info.GetNumElements();
389  size_t totalBytes = numElements * sizeof(float);
390  const size_t alignment = 64;
391  size_t space = totalBytes + alignment + alignment;
392  auto inputData0 = std::make_unique<uint8_t[]>(space);
393  void* alignedInputPtr0 = inputData0.get();
394  CHECK(std::align(alignment, totalBytes, alignedInputPtr0, space));
395 
396  auto* intputPtr0 = reinterpret_cast<float*>(alignedInputPtr0);
397  std::copy(inputValue0.begin(), inputValue0.end(), intputPtr0);
398 
399  auto inputData1 = std::make_unique<uint8_t[]>(space);
400  void* alignedInputPtr1 = inputData1.get();
401  CHECK(std::align(alignment, totalBytes, alignedInputPtr1, space));
402 
403  auto* intputPtr1 = reinterpret_cast<float*>(alignedInputPtr1);
404  std::copy(inputValue1.begin(), inputValue1.end(), intputPtr1);
405 
406  InputTensors inputTensors
407  {
408  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputPtr0) },
409  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), alignedInputPtr1) },
410  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
411  };
412  OutputTensors outputTensors
413  {
414  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
415  };
416 
417  runtime->GetProfiler(netId)->EnableProfiling(true);
418 
419  // Do the inference
420  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
421 
422  // Retrieve the Profiler.Print() output to get the workload execution
424  std::stringstream ss;
425  profilerManager.GetProfiler()->Print(ss);;
426  std::string dump = ss.str();
427 
428  // Executed Subtraction using CpuAcc
429  std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
430  CHECK(found != std::string::npos);
431 
432  // Correctly switch back to GpuAcc
433  found = dump.find("ClPooling2dWorkload_Execute");
434  CHECK(found != std::string::npos);
435 
436  // Contain CopyMemGeneric
437  found = dump.find("CopyMemGeneric");
438  CHECK(found != std::string::npos);
439 
440  // Check output is as expected
441  CHECK(outputData == expectedOutput);
442 
443  runtime->UnloadNetwork(netId);
444 }
445 
446 TEST_CASE("ClImportDisableFallbackSubgraphToNeon")
447 {
448  using namespace armnn;
449 
451  IRuntimePtr runtime(IRuntime::Create(options));
452 
453  // Builds up the structure of the network.
455 
456  Pooling2dDescriptor desc;
457 
458  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
459  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
460  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
461  IConnectableLayer* add = net->AddAdditionLayer("add");
462  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
463  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
464  IConnectableLayer* output = net->AddOutputLayer(0, "output");
465 
466  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
467  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
468  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
469  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
470  sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
471  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
472 
473  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
474  info.SetConstant(true);
475  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
476 
477  input0->GetOutputSlot(0).SetTensorInfo(info);
478  input1->GetOutputSlot(0).SetTensorInfo(info);
479  input2->GetOutputSlot(0).SetTensorInfo(info);
480  add->GetOutputSlot(0).SetTensorInfo(info);
481  sub->GetOutputSlot(0).SetTensorInfo(info);
482  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
483 
484  std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
485  // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
486  sub->BackendSelectionHint(backends[1]);
487 
488  // optimize the network
489  OptimizerOptions optOptions;
490  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
491 
492  Graph& graph = GetGraphForTesting(optNet.get());
493 
494  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
495  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
496  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
497  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
498  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
499  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
500  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
501  armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
502  armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
503 
504  // Checks order is valid.
505  CHECK(CheckOrder(graph, layer0, layer1));
506  CHECK(CheckOrder(graph, layer1, layer2));
507  CHECK(CheckOrder(graph, layer2, layer3));
508  CHECK(CheckOrder(graph, layer3, layer4));
509  CHECK(CheckOrder(graph, layer4, layer5));
510  CHECK(CheckOrder(graph, layer5, layer6));
511  CHECK(CheckOrder(graph, layer6, layer7));
512  CHECK(CheckOrder(graph, layer7, layer8));
513 
514  // Use memory import between backends
515  CHECK((layer4->GetType() == LayerType::MemCopy));
516  CHECK((layer6->GetType() == LayerType::MemCopy));
517 
518  // Correctly use backend hint
519  CHECK((layer5->GetBackendId() == Compute::CpuAcc ));
520 
521  // Load it into the runtime. It should pass.
522  NetworkId netId;
523  runtime->LoadNetwork(netId, std::move(optNet));
524 
525  // Creates structures for input & output
526  std::vector<float> inputData0
527  {
528  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
529  };
530  std::vector<float> inputData1
531  {
532  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
533  };
534  std::vector<float> inputData2
535  {
536  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
537  };
538 
539  std::vector<float> outputData(2);
540 
541  std::vector<float> expectedOutput{ 11.0f, -1.0f };
542 
543  InputTensors inputTensors
544  {
545  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
546  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
547  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
548  };
549  OutputTensors outputTensors
550  {
551  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
552  };
553 
554  runtime->GetProfiler(netId)->EnableProfiling(true);
555 
556  // Do the inference
557  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
558 
559  // Retrieve the Profiler.Print() output to get the workload execution
561  std::stringstream ss;
562  profilerManager.GetProfiler()->Print(ss);;
563  std::string dump = ss.str();
564 
565  // Executed Subtraction using CpuAcc
566  std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
567  CHECK(found != std::string::npos);
568 
569  // Correctly switch back to GpuAcc
570  found = dump.find("ClPooling2dWorkload_Execute");
571  CHECK(found != std::string::npos);
572 
573  // Contain CopyMemGeneric
574  found = dump.find("CopyMemGeneric");
575  CHECK(found != std::string::npos);
576 
577  // Check output is as expected
578  CHECK(outputData == expectedOutput);
579 }
580 
581 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:40
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:66
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:568
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
Definition: GraphUtils.cpp:68
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
uint32_t m_PoolWidth
Pooling width value.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:31
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:605
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
Copyright (c) 2021 ARM Limited and Contributors.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
Definition: Profiling.cpp:580
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
uint32_t m_PoolHeight
Pooling height value.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1680
int NetworkId
Definition: IRuntime.hpp:25
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:270
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:242
GPU Execution: OpenCL: ArmCompute.
ArmNN performs an optimization on each model/network before it gets loaded for execution.
Definition: INetwork.hpp:137
const BackendId & GetBackendId() const
Definition: Layer.hpp:274
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:47
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:516
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:241
virtual int Connect(IInputSlot &destination)=0
A Pooling2dDescriptor for the Pooling2dLayer.
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:492
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
unsigned int GetNumElements() const
Definition: Tensor.hpp:196