ArmNN
 20.11
NeonFallbackTests.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
8 
9 #include <test/GraphUtils.hpp>
10 
11 #include <boost/test/unit_test.hpp>
12 
13 BOOST_AUTO_TEST_SUITE(NeonFallback)
14 
15 BOOST_AUTO_TEST_CASE(FallbackImportToCpuAcc)
16 {
17  using namespace armnn;
18 
19  // Create a mock backend object
20  MockImportBackendInitialiser initialiser; // Register the Mock Backend
21  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
22  BOOST_TEST((backendObjPtr != nullptr));
23 
25  if (backendIds.find("MockRef") == backendIds.end())
26  {
27  std::string message = "Cannot load MockRef";
28  BOOST_FAIL(message);
29  }
30 
31  // Create runtime in which test will run and allow fallback to CpuRef.
33  IRuntimePtr runtime(IRuntime::Create(options));
34 
35  // Builds up the structure of the network.
37 
38  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
39  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
40  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
41  IConnectableLayer* add = net->AddAdditionLayer("add");
42  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
43  IConnectableLayer* output = net->AddOutputLayer(0, "output");
44 
45  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
46  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
47  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
48  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
49  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
50 
51  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
52 
53  input0->GetOutputSlot(0).SetTensorInfo(info);
54  input1->GetOutputSlot(0).SetTensorInfo(info);
55  input2->GetOutputSlot(0).SetTensorInfo(info);
56  add->GetOutputSlot(0).SetTensorInfo(info);
57  sub->GetOutputSlot(0).SetTensorInfo(info);
58 
59  // optimize the network
60  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
61  OptimizerOptions optOptions;
62  optOptions.m_ImportEnabled = true;
63  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
64 
65  OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
66  Graph& graph = optNetObjPtr->GetGraph();
67 
68  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
69  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
70  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
71  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
72  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
73  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
74  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
75 
76  // Checks order is valid.
77  BOOST_TEST(CheckOrder(graph, layer0, layer1));
78  BOOST_TEST(CheckOrder(graph, layer1, layer2));
79  BOOST_TEST(CheckOrder(graph, layer2, layer3));
80  BOOST_TEST(CheckOrder(graph, layer3, layer4));
81  BOOST_TEST(CheckOrder(graph, layer4, layer5));
82  BOOST_TEST(CheckOrder(graph, layer5, layer6));
83 
84  // Load it into the runtime. It should pass.
85  NetworkId netId;
86  std::string ignoredErrorMessage;
87  INetworkProperties networkProperties(true, true);
88 
89  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
90 
91  // Creates structures for input & output
92  std::vector<float> inputData0
93  {
94  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
95  };
96  std::vector<float> inputData1
97  {
98  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
99  };
100  std::vector<float> inputData2
101  {
102  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
103  };
104 
105  std::vector<float> outputData(12);
106 
107  std::vector<float> expectedOutput
108  {
109  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
110  };
111 
112  InputTensors inputTensors
113  {
114  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
115  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
116  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
117  };
118  OutputTensors outputTensors
119  {
120  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
121  };
122 
123  runtime->GetProfiler(netId)->EnableProfiling(true);
124 
125  // Do the inference
126  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
127 
128  // Retrieve the Profiler.Print() output to get the workload execution
130  std::stringstream ss;
131  profilerManager.GetProfiler()->Print(ss);;
132  std::string dump = ss.str();
133 
134  // Contains ImportMemGeneric
135  std::size_t found = dump.find("ImportMemGeneric");
136  BOOST_TEST(found != std::string::npos);
137 
138  // Contains SyncMemGeneric
139  found = dump.find("SyncMemGeneric");
140  BOOST_TEST(found != std::string::npos);
141 
142  // Does not contain CopyMemGeneric
143  found = dump.find("CopyMemGeneric");
144  BOOST_TEST(found == std::string::npos);
145 
146  // Use memory import between backends
147  BOOST_TEST((layer4->GetType() == LayerType::MemImport));
148 
149  // Check output is as expected
150  BOOST_TEST(outputData == expectedOutput);
151 }
152 
153 BOOST_AUTO_TEST_CASE(FallbackPaddingCopyToCpuAcc)
154 {
155  using namespace armnn;
156 
157  // Create a mock backend object
158  MockImportBackendInitialiser initialiser; // Register the Mock Backend
159  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
160  BOOST_TEST((backendObjPtr != nullptr));
161 
163  if (backendIds.find("MockRef") == backendIds.end())
164  {
165  std::string message = "Cannot load MockRef";
166  BOOST_FAIL(message);
167  }
168 
169  // Create runtime in which test will run and allow fallback to CpuRef.
171  IRuntimePtr runtime(IRuntime::Create(options));
172 
173  // Builds up the structure of the network.
175 
176  Pooling2dDescriptor desc;
177 
178  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
179  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
180  IConnectableLayer* add = net->AddAdditionLayer("add");
181  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
182  IConnectableLayer* output = net->AddOutputLayer(0, "output");
183 
184  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
185  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
186  add->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
187  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
188 
189  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
190  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
191 
192  input0->GetOutputSlot(0).SetTensorInfo(info);
193  input1->GetOutputSlot(0).SetTensorInfo(info);
194  add->GetOutputSlot(0).SetTensorInfo(info);
195  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
196 
197  // optimize the network
198  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
199  OptimizerOptions optOptions;
200  optOptions.m_ImportEnabled = true;
201  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
202 
203  OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
204  Graph& graph = optNetObjPtr->GetGraph();
205 
206  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
207  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
208  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "add");
209  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ add (0) -> pooling (0) ]");
210  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "pooling");
211  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output");
212 
213  // Checks order is valid.
214  BOOST_TEST(CheckOrder(graph, layer0, layer1));
215  BOOST_TEST(CheckOrder(graph, layer1, layer2));
216  BOOST_TEST(CheckOrder(graph, layer2, layer3));
217  BOOST_TEST(CheckOrder(graph, layer3, layer4));
218  BOOST_TEST(CheckOrder(graph, layer4, layer5));
219 
220  // Load it into the runtime. It should pass.
221  NetworkId netId;
222  std::string ignoredErrorMessage;
223  INetworkProperties networkProperties(true, true);
224 
225  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
226 
227  // Creates structures for input & output
228  std::vector<float> inputData0
229  {
230  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
231  };
232  std::vector<float> inputData1
233  {
234  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
235  };
236 
237  std::vector<float> outputData(2);
238 
239  std::vector<float> expectedOutput
240  {
241  6.0f, 12.0f
242  };
243 
244  InputTensors inputTensors
245  {
246  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
247  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }
248  };
249  OutputTensors outputTensors
250  {
251  { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
252  };
253 
254  runtime->GetProfiler(netId)->EnableProfiling(true);
255 
256  // Do the inference
257  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
258 
259  // Retrieve the Profiler.Print() output to get the workload execution
261  std::stringstream ss;
262  profilerManager.GetProfiler()->Print(ss);;
263  std::string dump = ss.str();
264 
265  // Contains CopyMemGeneric between the backends
266  std::size_t found = dump.find("CopyMemGeneric");
267  BOOST_TEST(found != std::string::npos);
268 
269  // Contains SyncMemGeneric for the output
270  found = dump.find("SyncMemGeneric");
271  BOOST_TEST(found != std::string::npos);
272 
273  // Does not contain ImportMemGeneric
274  found = dump.find("ImportMemGeneric");
275  BOOST_TEST(found == std::string::npos);
276 
277  // Use memory import between backends
278  BOOST_TEST((layer3->GetType() == LayerType::MemCopy));
279 
280  // Check output is as expected
281  BOOST_TEST(outputData == expectedOutput);
282 }
283 
284 BOOST_AUTO_TEST_CASE(FallbackImportFromCpuAcc)
285 {
286  using namespace armnn;
287 
288  // Create a mock backend object
289  MockImportBackendInitialiser initialiser; // Register the Mock Backend
290  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
291  BOOST_TEST((backendObjPtr != nullptr));
292 
294  if (backendIds.find("MockRef") == backendIds.end())
295  {
296  std::string message = "Cannot load MockRef";
297  BOOST_FAIL(message);
298  }
299 
300  // Create runtime in which test will run and allow fallback to CpuRef.
302  IRuntimePtr runtime(IRuntime::Create(options));
303 
304  // Builds up the structure of the network.
306 
307  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
308  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
309  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
310  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
311  IConnectableLayer* add = net->AddAdditionLayer("add");
312  IConnectableLayer* output = net->AddOutputLayer(0, "output");
313 
314  input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
315  input1->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
316  input2->GetOutputSlot(0).Connect(add->GetInputSlot(0));
317  sub->GetOutputSlot(0).Connect(add->GetInputSlot(1));
318  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
319 
320  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
321 
322  input0->GetOutputSlot(0).SetTensorInfo(info);
323  input1->GetOutputSlot(0).SetTensorInfo(info);
324  input2->GetOutputSlot(0).SetTensorInfo(info);
325  sub->GetOutputSlot(0).SetTensorInfo(info);
326  add->GetOutputSlot(0).SetTensorInfo(info);
327 
328  // optimize the network
329  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
330  OptimizerOptions optOptions;
331  optOptions.m_ImportEnabled = true;
332  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
333 
334  OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
335  Graph& graph = optNetObjPtr->GetGraph();
336 
337  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
338  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
339  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
340  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "sub");
341  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ sub (0) -> add (1) ]");
342  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "add");
343  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
344 
345  // Checks order is valid.
346  BOOST_TEST(CheckOrder(graph, layer0, layer1));
347  BOOST_TEST(CheckOrder(graph, layer1, layer2));
348  BOOST_TEST(CheckOrder(graph, layer2, layer3));
349  BOOST_TEST(CheckOrder(graph, layer3, layer4));
350  BOOST_TEST(CheckOrder(graph, layer4, layer5));
351  BOOST_TEST(CheckOrder(graph, layer5, layer6));
352 
353  // Load it into the runtime. It should pass.
354  NetworkId netId;
355  std::string ignoredErrorMessage;
356  INetworkProperties networkProperties(true, true);
357 
358  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
359 
360  // Creates structures for input & output
361  std::vector<float> inputData0
362  {
363  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
364  };
365  std::vector<float> inputData1
366  {
367  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
368  };
369  std::vector<float> inputData2
370  {
371  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
372  };
373 
374  std::vector<float> outputData(12);
375 
376  std::vector<float> expectedOutput
377  {
378  13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
379  };
380 
381  InputTensors inputTensors
382  {
383  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
384  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
385  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
386  };
387  OutputTensors outputTensors
388  {
389  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
390  };
391 
392  runtime->GetProfiler(netId)->EnableProfiling(true);
393 
394  // Do the inference
395  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
396 
397  // Retrieve the Profiler.Print() output to get the workload execution
399  std::stringstream ss;
400  profilerManager.GetProfiler()->Print(ss);;
401  std::string dump = ss.str();
402 
403  // Contains ImportMemGeneric
404  std::size_t found = dump.find("ImportMemGeneric");
405  BOOST_TEST(found != std::string::npos);
406 
407  // Contains SyncMemGeneric
408  found = dump.find("SyncMemGeneric");
409  BOOST_TEST(found != std::string::npos);
410 
411  // Does not contain CopyMemGeneric
412  found = dump.find("CopyMemGeneric");
413  BOOST_TEST(found == std::string::npos);
414 
415  // Use memory import between backends
416  BOOST_TEST((layer4->GetType() == LayerType::MemImport));
417 
418  // Check output is as expected
419  BOOST_TEST(outputData == expectedOutput);
420 }
421 
422 BOOST_AUTO_TEST_CASE(FallbackPaddingCopyFromCpuAcc)
423 {
424  using namespace armnn;
425 
426  // Create a mock backend object
427  MockImportBackendInitialiser initialiser; // Register the Mock Backend
428  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
429  BOOST_TEST((backendObjPtr != nullptr));
430 
432  if (backendIds.find("MockRef") == backendIds.end())
433  {
434  std::string message = "Cannot load MockRef";
435  BOOST_FAIL(message);
436  }
437 
438  // Create runtime in which test will run and allow fallback to CpuRef.
440  IRuntimePtr runtime(IRuntime::Create(options));
441 
442  // Builds up the structure of the network.
444 
445  Pooling2dDescriptor desc;
446 
447  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
448  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
449  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
450  IConnectableLayer* add = net->AddAdditionLayer("add");
451  IConnectableLayer* output = net->AddOutputLayer(0, "output");
452 
453  input0->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
454  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
455  pooling->GetOutputSlot(0).Connect(add->GetInputSlot(0));
456  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
457 
458  TensorInfo inputInfo = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
459  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
460 
461  input0->GetOutputSlot(0).SetTensorInfo(inputInfo);
462  input1->GetOutputSlot(0).SetTensorInfo(poolingInfo);
463  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
464  add->GetOutputSlot(0).SetTensorInfo(poolingInfo);
465 
466  // optimize the network
467  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
468  OptimizerOptions optOptions;
469  optOptions.m_ImportEnabled = true;
470  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
471 
472  OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
473  Graph& graph = optNetObjPtr->GetGraph();
474 
475  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
476  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
477  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "pooling");
478  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ pooling (0) -> add (0) ]");
479  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "add");
480  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output");
481 
482  // Checks order is valid.
483  BOOST_TEST(CheckOrder(graph, layer0, layer1));
484  BOOST_TEST(CheckOrder(graph, layer1, layer2));
485  BOOST_TEST(CheckOrder(graph, layer2, layer3));
486  BOOST_TEST(CheckOrder(graph, layer3, layer4));
487  BOOST_TEST(CheckOrder(graph, layer4, layer5));
488 
489  // Load it into the runtime. It should pass.
490  NetworkId netId;
491  std::string ignoredErrorMessage;
492  INetworkProperties networkProperties(true, true);
493 
494  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
495 
496  // Creates structures for input & output
497  std::vector<float> inputData0
498  {
499  1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f
500  };
501  std::vector<float> inputData1
502  {
503  -1.0f, 3.0f
504  };
505 
506  std::vector<float> outputData(2);
507 
508  std::vector<float> expectedOutput
509  {
510  5.0f, 15.0f
511  };
512 
513  InputTensors inputTensors
514  {
515  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
516  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }
517  };
518  OutputTensors outputTensors
519  {
520  { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
521  };
522 
523  runtime->GetProfiler(netId)->EnableProfiling(true);
524 
525  // Do the inference
526  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
527 
528  // Retrieve the Profiler.Print() output to get the workload execution
530  std::stringstream ss;
531  profilerManager.GetProfiler()->Print(ss);;
532  std::string dump = ss.str();
533 
534  // Contains CopyMemGeneric between the backends
535  std::size_t found = dump.find("CopyMemGeneric");
536  BOOST_TEST(found != std::string::npos);
537 
538  // Contains SyncMemGeneric for the output
539  found = dump.find("SyncMemGeneric");
540  BOOST_TEST(found != std::string::npos);
541 
542  // Does not contain ImportMemGeneric
543  found = dump.find("ImportMemGeneric");
544  BOOST_TEST(found == std::string::npos);
545 
546  // Use memory import between backends
547  BOOST_TEST((layer3->GetType() == LayerType::MemCopy));
548 
549  // Check output is as expected
550  BOOST_TEST(outputData == expectedOutput);
551 }
552 
553 BOOST_AUTO_TEST_CASE(FallbackDisableImportFromCpuAcc)
554 {
555  using namespace armnn;
556 
557  // Create a mock backend object
558  MockImportBackendInitialiser initialiser; // Register the Mock Backend
559  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
560  BOOST_TEST((backendObjPtr != nullptr));
561 
563  if (backendIds.find("MockRef") == backendIds.end())
564  {
565  std::string message = "Cannot load MockRef";
566  BOOST_FAIL(message);
567  }
568 
569  // Create runtime in which test will run and allow fallback to CpuRef.
571  IRuntimePtr runtime(IRuntime::Create(options));
572 
573  // Builds up the structure of the network.
575 
576  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
577  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
578  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
579  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
580  IConnectableLayer* add = net->AddAdditionLayer("add");
581  IConnectableLayer* output = net->AddOutputLayer(0, "output");
582 
583  input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
584  input1->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
585  input2->GetOutputSlot(0).Connect(add->GetInputSlot(0));
586  sub->GetOutputSlot(0).Connect(add->GetInputSlot(1));
587  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
588 
589  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
590 
591  input0->GetOutputSlot(0).SetTensorInfo(info);
592  input1->GetOutputSlot(0).SetTensorInfo(info);
593  input2->GetOutputSlot(0).SetTensorInfo(info);
594  sub->GetOutputSlot(0).SetTensorInfo(info);
595  add->GetOutputSlot(0).SetTensorInfo(info);
596 
597  // optimize the network
598  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
599  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
600 
601  OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
602  Graph& graph = optNetObjPtr->GetGraph();
603 
604  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
605  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
606  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
607  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "sub");
608  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ sub (0) -> add (1) ]");
609  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "add");
610  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
611 
612  // Checks order is valid.
613  BOOST_TEST(CheckOrder(graph, layer0, layer1));
614  BOOST_TEST(CheckOrder(graph, layer1, layer2));
615  BOOST_TEST(CheckOrder(graph, layer2, layer3));
616  BOOST_TEST(CheckOrder(graph, layer3, layer4));
617  BOOST_TEST(CheckOrder(graph, layer4, layer5));
618  BOOST_TEST(CheckOrder(graph, layer5, layer6));
619 
620  // Load it into the runtime. It should pass.
621  NetworkId netId;
622  std::string ignoredErrorMessage;
623  INetworkProperties networkProperties(false, false);
624 
625  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
626 
627  // Creates structures for input & output
628  std::vector<float> inputData0
629  {
630  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
631  };
632  std::vector<float> inputData1
633  {
634  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
635  };
636  std::vector<float> inputData2
637  {
638  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
639  };
640 
641  std::vector<float> outputData(12);
642 
643  std::vector<float> expectedOutput
644  {
645  13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
646  };
647 
648  InputTensors inputTensors
649  {
650  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
651  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
652  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
653  };
654  OutputTensors outputTensors
655  {
656  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
657  };
658 
659  runtime->GetProfiler(netId)->EnableProfiling(true);
660 
661  // Do the inference
662  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
663 
664  // Retrieve the Profiler.Print() output to get the workload execution
666  std::stringstream ss;
667  profilerManager.GetProfiler()->Print(ss);;
668  std::string dump = ss.str();
669 
670  // Contains CopyMemGeneric between the backends
671  std::size_t found = dump.find("CopyMemGeneric");
672  BOOST_TEST(found != std::string::npos);
673 
674  // Does not contain ImportMemGeneric
675  found = dump.find("ImportMemGeneric");
676  BOOST_TEST(found == std::string::npos);
677 
678  // Use memory import between backends
679  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
680 
681  // Check output is as expected
682  BOOST_TEST(outputData == expectedOutput);
683 }
684 
685 #if defined(ARMCOMPUTECL_ENABLED)
686 BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackToCl)
687 {
688  using namespace armnn;
689 
691  IRuntimePtr runtime(IRuntime::Create(options));
692 
693  // Builds up the structure of the network.
695 
696  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
697  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
698  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
699  IConnectableLayer* add = net->AddAdditionLayer("add");
700  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
701  IConnectableLayer* output = net->AddOutputLayer(0, "output");
702 
703  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
704  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
705  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
706  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
707  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
708 
709  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
710 
711  input0->GetOutputSlot(0).SetTensorInfo(info);
712  input1->GetOutputSlot(0).SetTensorInfo(info);
713  input2->GetOutputSlot(0).SetTensorInfo(info);
714  add->GetOutputSlot(0).SetTensorInfo(info);
715  sub->GetOutputSlot(0).SetTensorInfo(info);
716 
717  std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
718  // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
719  sub->BackendSelectionHint(backends[1]);
720 
721  // optimize the network
722  OptimizerOptions optOptions;
723  optOptions.m_ImportEnabled = true;
724  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
725 
726  OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
727  Graph& graph = optNetObjPtr->GetGraph();
728 
729  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
730  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
731  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
732  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
733  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
734  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
735  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
736 
737  // Checks order is valid.
738  BOOST_TEST(CheckOrder(graph, layer0, layer1));
739  BOOST_TEST(CheckOrder(graph, layer1, layer2));
740  BOOST_TEST(CheckOrder(graph, layer2, layer3));
741  BOOST_TEST(CheckOrder(graph, layer3, layer4));
742  BOOST_TEST(CheckOrder(graph, layer4, layer5));
743  BOOST_TEST(CheckOrder(graph, layer5, layer6));
744 
745  // Use memory import between backends
746  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
747 
748  // Correctly use backend hint
749  BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
750 
751  // Load it into the runtime. It should pass.
752  NetworkId netId;
753  std::string ignoredErrorMessage;
754  INetworkProperties networkProperties(true, true);
755 
756  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
757 
758  // Creates structures for input & output
759  std::vector<float> inputData0
760  {
761  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
762  };
763  std::vector<float> inputData1
764  {
765  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
766  };
767  std::vector<float> inputData2
768  {
769  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
770  };
771 
772  std::vector<float> outputData(12);
773 
774  std::vector<float> expectedOutput
775  {
776  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
777  };
778 
779  InputTensors inputTensors
780  {
781  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
782  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
783  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
784  };
785  OutputTensors outputTensors
786  {
787  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
788  };
789 
790  runtime->GetProfiler(netId)->EnableProfiling(true);
791 
792  // Do the inference
793  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
794 
795  // Retrieve the Profiler.Print() output to get the workload execution
797  std::stringstream ss;
798  profilerManager.GetProfiler()->Print(ss);;
799  std::string dump = ss.str();
800 
801  // Executed Subtraction using GpuAcc
802  std::size_t found = dump.find("ClSubtractionWorkload_Execute");
803  BOOST_TEST(found != std::string::npos);
804 
805  // Contain CopyMemGeneric
806  found = dump.find("CopyMemGeneric");
807  BOOST_TEST(found != std::string::npos);
808 
809  // Check output is as expected
810  BOOST_TEST(outputData == expectedOutput);
811 }
812 
813 BOOST_AUTO_TEST_CASE(NeonImportDisabledFallbackToCl)
814 {
815  using namespace armnn;
816 
818  IRuntimePtr runtime(IRuntime::Create(options));
819 
820  // Builds up the structure of the network.
822 
823  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
824  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
825  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
826  IConnectableLayer* add = net->AddAdditionLayer("add");
827  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
828  IConnectableLayer* output = net->AddOutputLayer(0, "output");
829 
830  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
831  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
832  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
833  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
834  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
835 
836  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
837 
838  input0->GetOutputSlot(0).SetTensorInfo(info);
839  input1->GetOutputSlot(0).SetTensorInfo(info);
840  input2->GetOutputSlot(0).SetTensorInfo(info);
841  add->GetOutputSlot(0).SetTensorInfo(info);
842  sub->GetOutputSlot(0).SetTensorInfo(info);
843 
844  std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
845  // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
846  sub->BackendSelectionHint(backends[1]);
847 
848  // optimize the network
849  OptimizerOptions optOptions;
850  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
851 
852  OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
853  Graph& graph = optNetObjPtr->GetGraph();
854 
855  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
856  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
857  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
858  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
859  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
860  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
861  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
862 
863  // Checks order is valid.
864  BOOST_TEST(CheckOrder(graph, layer0, layer1));
865  BOOST_TEST(CheckOrder(graph, layer1, layer2));
866  BOOST_TEST(CheckOrder(graph, layer2, layer3));
867  BOOST_TEST(CheckOrder(graph, layer3, layer4));
868  BOOST_TEST(CheckOrder(graph, layer4, layer5));
869  BOOST_TEST(CheckOrder(graph, layer5, layer6));
870 
871  // Use memory import between backends
872  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
873 
874  // Correctly use backend hint
875  BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
876 
877  // Load it into the runtime. It should pass.
878  NetworkId netId;
879  runtime->LoadNetwork(netId, std::move(optNet));
880 
881  // Creates structures for input & output
882  std::vector<float> inputData0
883  {
884  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
885  };
886  std::vector<float> inputData1
887  {
888  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
889  };
890  std::vector<float> inputData2
891  {
892  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
893  };
894 
895  std::vector<float> outputData(12);
896 
897  std::vector<float> expectedOutput
898  {
899  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
900  };
901 
902  InputTensors inputTensors
903  {
904  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
905  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
906  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
907  };
908  OutputTensors outputTensors
909  {
910  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
911  };
912 
913  runtime->GetProfiler(netId)->EnableProfiling(true);
914 
915  // Do the inference
916  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
917 
918  // Retrieve the Profiler.Print() output to get the workload execution
920  std::stringstream ss;
921  profilerManager.GetProfiler()->Print(ss);;
922  std::string dump = ss.str();
923 
924  // Executed Subtraction using GpuAcc
925  std::size_t found = dump.find("ClSubtractionWorkload_Execute");
926  BOOST_TEST(found != std::string::npos);
927 
928  // Contain CopyMemGeneric
929  found = dump.find("CopyMemGeneric");
930  BOOST_TEST(found != std::string::npos);
931 
932  // Check output is as expected
933  BOOST_TEST(outputData == expectedOutput);
934 }
935 
936 BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackSubgraphToCl)
937 {
938  using namespace armnn;
939 
941  IRuntimePtr runtime(IRuntime::Create(options));
942 
943  // Builds up the structure of the network.
945 
946  Pooling2dDescriptor desc;
947 
948  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
949  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
950  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
951  IConnectableLayer* add = net->AddAdditionLayer("add");
952  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
953  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
954  IConnectableLayer* output = net->AddOutputLayer(0, "output");
955 
956  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
957  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
958  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
959  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
960  sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
961  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
962 
963  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
964  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
965 
966  input0->GetOutputSlot(0).SetTensorInfo(info);
967  input1->GetOutputSlot(0).SetTensorInfo(info);
968  input2->GetOutputSlot(0).SetTensorInfo(info);
969  add->GetOutputSlot(0).SetTensorInfo(info);
970  sub->GetOutputSlot(0).SetTensorInfo(info);
971  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
972 
973  std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
974  // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
975  sub->BackendSelectionHint(backends[1]);
976 
977  // optimize the network
978  OptimizerOptions optOptions;
979  optOptions.m_ImportEnabled = true;
980  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
981 
982  OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
983  Graph& graph = optNetObjPtr->GetGraph();
984 
985  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
986  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
987  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
988  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
989  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
990  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
991  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
992  armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
993  armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
994 
995  // Checks order is valid.
996  BOOST_TEST(CheckOrder(graph, layer0, layer1));
997  BOOST_TEST(CheckOrder(graph, layer1, layer2));
998  BOOST_TEST(CheckOrder(graph, layer2, layer3));
999  BOOST_TEST(CheckOrder(graph, layer3, layer4));
1000  BOOST_TEST(CheckOrder(graph, layer4, layer5));
1001  BOOST_TEST(CheckOrder(graph, layer5, layer6));
1002  BOOST_TEST(CheckOrder(graph, layer6, layer7));
1003  BOOST_TEST(CheckOrder(graph, layer7, layer8));
1004 
1005  // Use memory import between backends
1006  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
1007  BOOST_TEST((layer6->GetType() == LayerType::MemCopy));
1008 
1009  // Correctly use backend hint
1010  BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
1011 
1012  // Load it into the runtime. It should pass.
1013  NetworkId netId;
1014  std::string ignoredErrorMessage;
1015  INetworkProperties networkProperties(true, true);
1016 
1017  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
1018 
1019  // Creates structures for input & output
1020  std::vector<float> inputData0
1021  {
1022  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
1023  };
1024  std::vector<float> inputData1
1025  {
1026  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
1027  };
1028  std::vector<float> inputData2
1029  {
1030  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
1031  };
1032 
1033  std::vector<float> outputData(2);
1034 
1035  std::vector<float> expectedOutput{ 11.0f, -1.0f };
1036 
1037  InputTensors inputTensors
1038  {
1039  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
1040  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
1041  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
1042  };
1043  OutputTensors outputTensors
1044  {
1045  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1046  };
1047 
1048  runtime->GetProfiler(netId)->EnableProfiling(true);
1049 
1050  // Do the inference
1051  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1052 
1053  // Retrieve the Profiler.Print() output to get the workload execution
1055  std::stringstream ss;
1056  profilerManager.GetProfiler()->Print(ss);;
1057  std::string dump = ss.str();
1058 
1059  // Executed Subtraction using GpuAcc
1060  std::size_t found = dump.find("ClSubtractionWorkload_Execute");
1061  BOOST_TEST(found != std::string::npos);
1062 
1063  // Correctly switch back to CpuAcc
1064  found = dump.find("NeonPooling2dWorkload_Execute");
1065  BOOST_TEST(found != std::string::npos);
1066 
1067  // Contain CopyMemGeneric
1068  found = dump.find("CopyMemGeneric");
1069  BOOST_TEST(found != std::string::npos);
1070 
1071  // Contains SyncMemGeneric for output
1072  found = dump.find("SyncMemGeneric");
1073  BOOST_TEST(found != std::string::npos);
1074 
1075  // Check output is as expected
1076  BOOST_TEST(outputData == expectedOutput);
1077 }
1078 
1079 BOOST_AUTO_TEST_CASE(NeonImportDisableFallbackSubgraphToCl)
1080 {
1081  using namespace armnn;
1082 
1083  IRuntime::CreationOptions options;
1084  IRuntimePtr runtime(IRuntime::Create(options));
1085 
1086  // Builds up the structure of the network.
1088 
1089  Pooling2dDescriptor desc;
1090 
1091  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
1092  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
1093  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
1094  IConnectableLayer* add = net->AddAdditionLayer("add");
1095  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
1096  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
1097  IConnectableLayer* output = net->AddOutputLayer(0, "output");
1098 
1099  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
1100  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
1101  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
1102  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
1103  sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
1104  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1105 
1106  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
1107  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
1108 
1109  input0->GetOutputSlot(0).SetTensorInfo(info);
1110  input1->GetOutputSlot(0).SetTensorInfo(info);
1111  input2->GetOutputSlot(0).SetTensorInfo(info);
1112  add->GetOutputSlot(0).SetTensorInfo(info);
1113  sub->GetOutputSlot(0).SetTensorInfo(info);
1114  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
1115 
1116  std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
1117  // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
1118  sub->BackendSelectionHint(backends[1]);
1119 
1120  // optimize the network
1121  OptimizerOptions optOptions;
1122  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
1123 
1124  OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
1125  Graph& graph = optNetObjPtr->GetGraph();
1126 
1127  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
1128  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
1129  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
1130  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
1131  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
1132  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
1133  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
1134  armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
1135  armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
1136 
1137  // Checks order is valid.
1138  BOOST_TEST(CheckOrder(graph, layer0, layer1));
1139  BOOST_TEST(CheckOrder(graph, layer1, layer2));
1140  BOOST_TEST(CheckOrder(graph, layer2, layer3));
1141  BOOST_TEST(CheckOrder(graph, layer3, layer4));
1142  BOOST_TEST(CheckOrder(graph, layer4, layer5));
1143  BOOST_TEST(CheckOrder(graph, layer5, layer6));
1144  BOOST_TEST(CheckOrder(graph, layer6, layer7));
1145  BOOST_TEST(CheckOrder(graph, layer7, layer8));
1146 
1147  // Use memory import between backends
1148  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
1149  BOOST_TEST((layer6->GetType() == LayerType::MemCopy));
1150 
1151  // Correctly use backend hint
1152  BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
1153 
1154  // Load it into the runtime. It should pass.
1155  NetworkId netId;
1156  runtime->LoadNetwork(netId, std::move(optNet));
1157 
1158  // Creates structures for input & output
1159  std::vector<float> inputData0
1160  {
1161  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
1162  };
1163  std::vector<float> inputData1
1164  {
1165  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
1166  };
1167  std::vector<float> inputData2
1168  {
1169  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
1170  };
1171 
1172  std::vector<float> outputData(2);
1173 
1174  std::vector<float> expectedOutput{ 11.0f, -1.0f };
1175 
1176  InputTensors inputTensors
1177  {
1178  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
1179  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
1180  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
1181  };
1182  OutputTensors outputTensors
1183  {
1184  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1185  };
1186 
1187  runtime->GetProfiler(netId)->EnableProfiling(true);
1188 
1189  // Do the inference
1190  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1191 
1192  // Retrieve the Profiler.Print() output to get the workload execution
1194  std::stringstream ss;
1195  profilerManager.GetProfiler()->Print(ss);;
1196  std::string dump = ss.str();
1197 
1198  // Executed Subtraction using GpuAcc
1199  std::size_t found = dump.find("ClSubtractionWorkload_Execute");
1200  BOOST_TEST(found != std::string::npos);
1201 
1202  // Correctly switch back to CpuAcc
1203  found = dump.find("NeonPooling2dWorkload_Execute");
1204  BOOST_TEST(found != std::string::npos);
1205 
1206  // Contain CopyMemGeneric
1207  found = dump.find("CopyMemGeneric");
1208  BOOST_TEST(found != std::string::npos);
1209 
1210  // Check output is as expected
1211  BOOST_TEST(outputData == expectedOutput);
1212 }
1213 #endif
1214 
BOOST_AUTO_TEST_SUITE(TensorflowLiteParser)
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:32
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
BackendIdSet GetBackendIds() const
std::unordered_set< BackendId > BackendIdSet
Definition: BackendId.hpp:191
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:486
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
Definition: GraphUtils.cpp:68
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
BOOST_AUTO_TEST_CASE(FallbackImportToCpuAcc)
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:25
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
int NetworkId
Definition: IRuntime.hpp:20
Copyright (c) 2020 ARM Limited.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
constexpr const char * MockImportBackendId()
Profiler * GetProfiler()
Definition: Profiling.cpp:498
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:306
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1011
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:314
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:600
GPU Execution: OpenCL: ArmCompute.
BOOST_AUTO_TEST_CASE(CheckConvolution2dLayer)
const BackendId & GetBackendId() const
Definition: Layer.hpp:266
BOOST_AUTO_TEST_SUITE_END()
CPU Execution: NEON: ArmCompute.
armnn::IBackendInternalUniquePtr CreateBackendObject(const armnn::BackendId &backendId)
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
LayerType GetType() const
Definition: Layer.hpp:262
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:101
virtual int Connect(IInputSlot &destination)=0
A Pooling2dDescriptor for the Pooling2dLayer.
void Print(std::ostream &outStream) const override
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:330
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:46