ArmNN
 21.02
NeonFallbackTests.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
8 
9 #include <test/GraphUtils.hpp>
10 
11 #include <boost/test/unit_test.hpp>
12 
13 BOOST_AUTO_TEST_SUITE(NeonFallback)
14 
15 BOOST_AUTO_TEST_CASE(FallbackImportToCpuAcc)
16 {
17  using namespace armnn;
18 
19  // Create a mock backend object
20  MockImportBackendInitialiser initialiser; // Register the Mock Backend
21  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
22  BOOST_TEST((backendObjPtr != nullptr));
23 
25  if (backendIds.find("MockRef") == backendIds.end())
26  {
27  std::string message = "Cannot load MockRef";
28  BOOST_FAIL(message);
29  }
30 
31  // Create runtime in which test will run and allow fallback to CpuRef.
33  IRuntimePtr runtime(IRuntime::Create(options));
34 
35  // Builds up the structure of the network.
37 
38  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
39  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
40  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
41  IConnectableLayer* add = net->AddAdditionLayer("add");
42  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
43  IConnectableLayer* output = net->AddOutputLayer(0, "output");
44 
45  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
46  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
47  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
48  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
49  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
50 
51  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
52 
53  input0->GetOutputSlot(0).SetTensorInfo(info);
54  input1->GetOutputSlot(0).SetTensorInfo(info);
55  input2->GetOutputSlot(0).SetTensorInfo(info);
56  add->GetOutputSlot(0).SetTensorInfo(info);
57  sub->GetOutputSlot(0).SetTensorInfo(info);
58 
59  // optimize the network
60  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
61  OptimizerOptions optOptions;
62  optOptions.m_ImportEnabled = true;
63  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
64 
65  Graph& graph = GetGraphForTesting(optNet.get());
66 
67  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
68  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
69  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
70  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
71  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
72  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
73  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
74 
75  // Checks order is valid.
76  BOOST_TEST(CheckOrder(graph, layer0, layer1));
77  BOOST_TEST(CheckOrder(graph, layer1, layer2));
78  BOOST_TEST(CheckOrder(graph, layer2, layer3));
79  BOOST_TEST(CheckOrder(graph, layer3, layer4));
80  BOOST_TEST(CheckOrder(graph, layer4, layer5));
81  BOOST_TEST(CheckOrder(graph, layer5, layer6));
82 
83  // Load it into the runtime. It should pass.
84  NetworkId netId;
85  std::string ignoredErrorMessage;
86  INetworkProperties networkProperties(true, true);
87 
88  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
89 
90  // Creates structures for input & output
91  std::vector<float> inputData0
92  {
93  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
94  };
95  std::vector<float> inputData1
96  {
97  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
98  };
99  std::vector<float> inputData2
100  {
101  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
102  };
103 
104  std::vector<float> outputData(12);
105 
106  std::vector<float> expectedOutput
107  {
108  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
109  };
110 
111  InputTensors inputTensors
112  {
113  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
114  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
115  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
116  };
117  OutputTensors outputTensors
118  {
119  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
120  };
121 
122  runtime->GetProfiler(netId)->EnableProfiling(true);
123 
124  // Do the inference
125  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
126 
127  // Retrieve the Profiler.Print() output to get the workload execution
129  std::stringstream ss;
130  profilerManager.GetProfiler()->Print(ss);;
131  std::string dump = ss.str();
132 
133  // Contains ImportMemGeneric
134  std::size_t found = dump.find("ImportMemGeneric");
135  BOOST_TEST(found != std::string::npos);
136 
137  // Contains SyncMemGeneric
138  found = dump.find("SyncMemGeneric");
139  BOOST_TEST(found != std::string::npos);
140 
141  // Does not contain CopyMemGeneric
142  found = dump.find("CopyMemGeneric");
143  BOOST_TEST(found == std::string::npos);
144 
145  // Use memory import between backends
146  BOOST_TEST((layer4->GetType() == LayerType::MemImport));
147 
148  // Check output is as expected
149  BOOST_TEST(outputData == expectedOutput);
150 }
151 
152 BOOST_AUTO_TEST_CASE(FallbackPaddingCopyToCpuAcc)
153 {
154  using namespace armnn;
155 
156  // Create a mock backend object
157  MockImportBackendInitialiser initialiser; // Register the Mock Backend
158  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
159  BOOST_TEST((backendObjPtr != nullptr));
160 
162  if (backendIds.find("MockRef") == backendIds.end())
163  {
164  std::string message = "Cannot load MockRef";
165  BOOST_FAIL(message);
166  }
167 
168  // Create runtime in which test will run and allow fallback to CpuRef.
170  IRuntimePtr runtime(IRuntime::Create(options));
171 
172  // Builds up the structure of the network.
174 
175  Pooling2dDescriptor desc;
176 
177  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
178  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
179  IConnectableLayer* add = net->AddAdditionLayer("add");
180  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
181  IConnectableLayer* output = net->AddOutputLayer(0, "output");
182 
183  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
184  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
185  add->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
186  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
187 
188  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
189  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
190 
191  input0->GetOutputSlot(0).SetTensorInfo(info);
192  input1->GetOutputSlot(0).SetTensorInfo(info);
193  add->GetOutputSlot(0).SetTensorInfo(info);
194  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
195 
196  // optimize the network
197  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
198  OptimizerOptions optOptions;
199  optOptions.m_ImportEnabled = true;
200  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
201 
202  Graph& graph = GetGraphForTesting(optNet.get());
203 
204  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
205  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
206  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "add");
207  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ add (0) -> pooling (0) ]");
208  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "pooling");
209  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output");
210 
211  // Checks order is valid.
212  BOOST_TEST(CheckOrder(graph, layer0, layer1));
213  BOOST_TEST(CheckOrder(graph, layer1, layer2));
214  BOOST_TEST(CheckOrder(graph, layer2, layer3));
215  BOOST_TEST(CheckOrder(graph, layer3, layer4));
216  BOOST_TEST(CheckOrder(graph, layer4, layer5));
217 
218  // Load it into the runtime. It should pass.
219  NetworkId netId;
220  std::string ignoredErrorMessage;
221  INetworkProperties networkProperties(true, true);
222 
223  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
224 
225  // Creates structures for input & output
226  std::vector<float> inputData0
227  {
228  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
229  };
230  std::vector<float> inputData1
231  {
232  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
233  };
234 
235  std::vector<float> outputData(2);
236 
237  std::vector<float> expectedOutput
238  {
239  6.0f, 12.0f
240  };
241 
242  InputTensors inputTensors
243  {
244  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
245  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }
246  };
247  OutputTensors outputTensors
248  {
249  { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
250  };
251 
252  runtime->GetProfiler(netId)->EnableProfiling(true);
253 
254  // Do the inference
255  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
256 
257  // Retrieve the Profiler.Print() output to get the workload execution
259  std::stringstream ss;
260  profilerManager.GetProfiler()->Print(ss);;
261  std::string dump = ss.str();
262 
263  // Contains CopyMemGeneric between the backends
264  std::size_t found = dump.find("CopyMemGeneric");
265  BOOST_TEST(found != std::string::npos);
266 
267  // Contains SyncMemGeneric for the output
268  found = dump.find("SyncMemGeneric");
269  BOOST_TEST(found != std::string::npos);
270 
271  // Does not contain ImportMemGeneric
272  found = dump.find("ImportMemGeneric");
273  BOOST_TEST(found == std::string::npos);
274 
275  // Use memory import between backends
276  BOOST_TEST((layer3->GetType() == LayerType::MemCopy));
277 
278  // Check output is as expected
279  BOOST_TEST(outputData == expectedOutput);
280 }
281 
282 BOOST_AUTO_TEST_CASE(FallbackImportFromCpuAcc)
283 {
284  using namespace armnn;
285 
286  // Create a mock backend object
287  MockImportBackendInitialiser initialiser; // Register the Mock Backend
288  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
289  BOOST_TEST((backendObjPtr != nullptr));
290 
292  if (backendIds.find("MockRef") == backendIds.end())
293  {
294  std::string message = "Cannot load MockRef";
295  BOOST_FAIL(message);
296  }
297 
298  // Create runtime in which test will run and allow fallback to CpuRef.
300  IRuntimePtr runtime(IRuntime::Create(options));
301 
302  // Builds up the structure of the network.
304 
305  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
306  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
307  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
308  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
309  IConnectableLayer* add = net->AddAdditionLayer("add");
310  IConnectableLayer* output = net->AddOutputLayer(0, "output");
311 
312  input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
313  input1->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
314  input2->GetOutputSlot(0).Connect(add->GetInputSlot(0));
315  sub->GetOutputSlot(0).Connect(add->GetInputSlot(1));
316  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
317 
318  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
319 
320  input0->GetOutputSlot(0).SetTensorInfo(info);
321  input1->GetOutputSlot(0).SetTensorInfo(info);
322  input2->GetOutputSlot(0).SetTensorInfo(info);
323  sub->GetOutputSlot(0).SetTensorInfo(info);
324  add->GetOutputSlot(0).SetTensorInfo(info);
325 
326  // optimize the network
327  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
328  OptimizerOptions optOptions;
329  optOptions.m_ImportEnabled = true;
330  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
331 
332  Graph& graph = GetGraphForTesting(optNet.get());
333 
334  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
335  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
336  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
337  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "sub");
338  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ sub (0) -> add (1) ]");
339  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "add");
340  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
341 
342  // Checks order is valid.
343  BOOST_TEST(CheckOrder(graph, layer0, layer1));
344  BOOST_TEST(CheckOrder(graph, layer1, layer2));
345  BOOST_TEST(CheckOrder(graph, layer2, layer3));
346  BOOST_TEST(CheckOrder(graph, layer3, layer4));
347  BOOST_TEST(CheckOrder(graph, layer4, layer5));
348  BOOST_TEST(CheckOrder(graph, layer5, layer6));
349 
350  // Load it into the runtime. It should pass.
351  NetworkId netId;
352  std::string ignoredErrorMessage;
353  INetworkProperties networkProperties(true, true);
354 
355  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
356 
357  // Creates structures for input & output
358  std::vector<float> inputData0
359  {
360  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
361  };
362  std::vector<float> inputData1
363  {
364  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
365  };
366  std::vector<float> inputData2
367  {
368  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
369  };
370 
371  std::vector<float> outputData(12);
372 
373  std::vector<float> expectedOutput
374  {
375  13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
376  };
377 
378  InputTensors inputTensors
379  {
380  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
381  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
382  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
383  };
384  OutputTensors outputTensors
385  {
386  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
387  };
388 
389  runtime->GetProfiler(netId)->EnableProfiling(true);
390 
391  // Do the inference
392  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
393 
394  // Retrieve the Profiler.Print() output to get the workload execution
396  std::stringstream ss;
397  profilerManager.GetProfiler()->Print(ss);;
398  std::string dump = ss.str();
399 
400  // Contains ImportMemGeneric
401  std::size_t found = dump.find("ImportMemGeneric");
402  BOOST_TEST(found != std::string::npos);
403 
404  // Contains SyncMemGeneric
405  found = dump.find("SyncMemGeneric");
406  BOOST_TEST(found != std::string::npos);
407 
408  // Does not contain CopyMemGeneric
409  found = dump.find("CopyMemGeneric");
410  BOOST_TEST(found == std::string::npos);
411 
412  // Use memory import between backends
413  BOOST_TEST((layer4->GetType() == LayerType::MemImport));
414 
415  // Check output is as expected
416  BOOST_TEST(outputData == expectedOutput);
417 }
418 
419 BOOST_AUTO_TEST_CASE(FallbackPaddingCopyFromCpuAcc)
420 {
421  using namespace armnn;
422 
423  // Create a mock backend object
424  MockImportBackendInitialiser initialiser; // Register the Mock Backend
425  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
426  BOOST_TEST((backendObjPtr != nullptr));
427 
429  if (backendIds.find("MockRef") == backendIds.end())
430  {
431  std::string message = "Cannot load MockRef";
432  BOOST_FAIL(message);
433  }
434 
435  // Create runtime in which test will run and allow fallback to CpuRef.
437  IRuntimePtr runtime(IRuntime::Create(options));
438 
439  // Builds up the structure of the network.
441 
442  Pooling2dDescriptor desc;
443 
444  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
445  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
446  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
447  IConnectableLayer* add = net->AddAdditionLayer("add");
448  IConnectableLayer* output = net->AddOutputLayer(0, "output");
449 
450  input0->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
451  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
452  pooling->GetOutputSlot(0).Connect(add->GetInputSlot(0));
453  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
454 
455  TensorInfo inputInfo = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
456  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
457 
458  input0->GetOutputSlot(0).SetTensorInfo(inputInfo);
459  input1->GetOutputSlot(0).SetTensorInfo(poolingInfo);
460  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
461  add->GetOutputSlot(0).SetTensorInfo(poolingInfo);
462 
463  // optimize the network
464  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
465  OptimizerOptions optOptions;
466  optOptions.m_ImportEnabled = true;
467  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
468 
469  Graph& graph = GetGraphForTesting(optNet.get());
470 
471  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
472  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
473  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "pooling");
474  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ pooling (0) -> add (0) ]");
475  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "add");
476  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output");
477 
478  // Checks order is valid.
479  BOOST_TEST(CheckOrder(graph, layer0, layer1));
480  BOOST_TEST(CheckOrder(graph, layer1, layer2));
481  BOOST_TEST(CheckOrder(graph, layer2, layer3));
482  BOOST_TEST(CheckOrder(graph, layer3, layer4));
483  BOOST_TEST(CheckOrder(graph, layer4, layer5));
484 
485  // Load it into the runtime. It should pass.
486  NetworkId netId;
487  std::string ignoredErrorMessage;
488  INetworkProperties networkProperties(true, true);
489 
490  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
491 
492  // Creates structures for input & output
493  std::vector<float> inputData0
494  {
495  1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f
496  };
497  std::vector<float> inputData1
498  {
499  -1.0f, 3.0f
500  };
501 
502  std::vector<float> outputData(2);
503 
504  std::vector<float> expectedOutput
505  {
506  5.0f, 15.0f
507  };
508 
509  InputTensors inputTensors
510  {
511  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
512  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }
513  };
514  OutputTensors outputTensors
515  {
516  { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
517  };
518 
519  runtime->GetProfiler(netId)->EnableProfiling(true);
520 
521  // Do the inference
522  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
523 
524  // Retrieve the Profiler.Print() output to get the workload execution
526  std::stringstream ss;
527  profilerManager.GetProfiler()->Print(ss);;
528  std::string dump = ss.str();
529 
530  // Contains CopyMemGeneric between the backends
531  std::size_t found = dump.find("CopyMemGeneric");
532  BOOST_TEST(found != std::string::npos);
533 
534  // Contains SyncMemGeneric for the output
535  found = dump.find("SyncMemGeneric");
536  BOOST_TEST(found != std::string::npos);
537 
538  // Does not contain ImportMemGeneric
539  found = dump.find("ImportMemGeneric");
540  BOOST_TEST(found == std::string::npos);
541 
542  // Use memory import between backends
543  BOOST_TEST((layer3->GetType() == LayerType::MemCopy));
544 
545  // Check output is as expected
546  BOOST_TEST(outputData == expectedOutput);
547 }
548 
549 BOOST_AUTO_TEST_CASE(FallbackDisableImportFromCpuAcc)
550 {
551  using namespace armnn;
552 
553  // Create a mock backend object
554  MockImportBackendInitialiser initialiser; // Register the Mock Backend
555  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
556  BOOST_TEST((backendObjPtr != nullptr));
557 
559  if (backendIds.find("MockRef") == backendIds.end())
560  {
561  std::string message = "Cannot load MockRef";
562  BOOST_FAIL(message);
563  }
564 
565  // Create runtime in which test will run and allow fallback to CpuRef.
567  IRuntimePtr runtime(IRuntime::Create(options));
568 
569  // Builds up the structure of the network.
571 
572  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
573  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
574  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
575  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
576  IConnectableLayer* add = net->AddAdditionLayer("add");
577  IConnectableLayer* output = net->AddOutputLayer(0, "output");
578 
579  input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
580  input1->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
581  input2->GetOutputSlot(0).Connect(add->GetInputSlot(0));
582  sub->GetOutputSlot(0).Connect(add->GetInputSlot(1));
583  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
584 
585  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
586 
587  input0->GetOutputSlot(0).SetTensorInfo(info);
588  input1->GetOutputSlot(0).SetTensorInfo(info);
589  input2->GetOutputSlot(0).SetTensorInfo(info);
590  sub->GetOutputSlot(0).SetTensorInfo(info);
591  add->GetOutputSlot(0).SetTensorInfo(info);
592 
593  // optimize the network
594  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
595  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
596 
597  Graph& graph = GetGraphForTesting(optNet.get());
598 
599  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
600  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
601  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
602  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "sub");
603  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ sub (0) -> add (1) ]");
604  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "add");
605  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
606 
607  // Checks order is valid.
608  BOOST_TEST(CheckOrder(graph, layer0, layer1));
609  BOOST_TEST(CheckOrder(graph, layer1, layer2));
610  BOOST_TEST(CheckOrder(graph, layer2, layer3));
611  BOOST_TEST(CheckOrder(graph, layer3, layer4));
612  BOOST_TEST(CheckOrder(graph, layer4, layer5));
613  BOOST_TEST(CheckOrder(graph, layer5, layer6));
614 
615  // Load it into the runtime. It should pass.
616  NetworkId netId;
617  std::string ignoredErrorMessage;
618  INetworkProperties networkProperties(false, false);
619 
620  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
621 
622  // Creates structures for input & output
623  std::vector<float> inputData0
624  {
625  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
626  };
627  std::vector<float> inputData1
628  {
629  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
630  };
631  std::vector<float> inputData2
632  {
633  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
634  };
635 
636  std::vector<float> outputData(12);
637 
638  std::vector<float> expectedOutput
639  {
640  13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
641  };
642 
643  InputTensors inputTensors
644  {
645  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
646  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
647  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
648  };
649  OutputTensors outputTensors
650  {
651  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
652  };
653 
654  runtime->GetProfiler(netId)->EnableProfiling(true);
655 
656  // Do the inference
657  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
658 
659  // Retrieve the Profiler.Print() output to get the workload execution
661  std::stringstream ss;
662  profilerManager.GetProfiler()->Print(ss);;
663  std::string dump = ss.str();
664 
665  // Contains CopyMemGeneric between the backends
666  std::size_t found = dump.find("CopyMemGeneric");
667  BOOST_TEST(found != std::string::npos);
668 
669  // Does not contain ImportMemGeneric
670  found = dump.find("ImportMemGeneric");
671  BOOST_TEST(found == std::string::npos);
672 
673  // Use memory import between backends
674  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
675 
676  // Check output is as expected
677  BOOST_TEST(outputData == expectedOutput);
678 }
679 
680 #if defined(ARMCOMPUTECL_ENABLED)
681 BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackToCl)
682 {
683  using namespace armnn;
684 
686  IRuntimePtr runtime(IRuntime::Create(options));
687 
688  // Builds up the structure of the network.
690 
691  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
692  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
693  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
694  IConnectableLayer* add = net->AddAdditionLayer("add");
695  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
696  IConnectableLayer* output = net->AddOutputLayer(0, "output");
697 
698  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
699  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
700  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
701  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
702  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
703 
704  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
705 
706  input0->GetOutputSlot(0).SetTensorInfo(info);
707  input1->GetOutputSlot(0).SetTensorInfo(info);
708  input2->GetOutputSlot(0).SetTensorInfo(info);
709  add->GetOutputSlot(0).SetTensorInfo(info);
710  sub->GetOutputSlot(0).SetTensorInfo(info);
711 
712  std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
713  // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
714  sub->BackendSelectionHint(backends[1]);
715 
716  // optimize the network
717  OptimizerOptions optOptions;
718  optOptions.m_ImportEnabled = true;
719  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
720 
721  Graph& graph = GetGraphForTesting(optNet.get());
722 
723  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
724  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
725  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
726  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
727  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
728  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
729  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
730 
731  // Checks order is valid.
732  BOOST_TEST(CheckOrder(graph, layer0, layer1));
733  BOOST_TEST(CheckOrder(graph, layer1, layer2));
734  BOOST_TEST(CheckOrder(graph, layer2, layer3));
735  BOOST_TEST(CheckOrder(graph, layer3, layer4));
736  BOOST_TEST(CheckOrder(graph, layer4, layer5));
737  BOOST_TEST(CheckOrder(graph, layer5, layer6));
738 
739  // Use memory import between backends
740  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
741 
742  // Correctly use backend hint
743  BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
744 
745  // Load it into the runtime. It should pass.
746  NetworkId netId;
747  std::string ignoredErrorMessage;
748  INetworkProperties networkProperties(true, true);
749 
750  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
751 
752  // Creates structures for input & output
753  std::vector<float> inputData0
754  {
755  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
756  };
757  std::vector<float> inputData1
758  {
759  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
760  };
761  std::vector<float> inputData2
762  {
763  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
764  };
765 
766  std::vector<float> outputData(12);
767 
768  std::vector<float> expectedOutput
769  {
770  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
771  };
772 
773  InputTensors inputTensors
774  {
775  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
776  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
777  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
778  };
779  OutputTensors outputTensors
780  {
781  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
782  };
783 
784  runtime->GetProfiler(netId)->EnableProfiling(true);
785 
786  // Do the inference
787  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
788 
789  // Retrieve the Profiler.Print() output to get the workload execution
791  std::stringstream ss;
792  profilerManager.GetProfiler()->Print(ss);;
793  std::string dump = ss.str();
794 
795  // Executed Subtraction using GpuAcc
796  std::size_t found = dump.find("ClSubtractionWorkload_Execute");
797  BOOST_TEST(found != std::string::npos);
798 
799  // Contain CopyMemGeneric
800  found = dump.find("CopyMemGeneric");
801  BOOST_TEST(found != std::string::npos);
802 
803  // Check output is as expected
804  BOOST_TEST(outputData == expectedOutput);
805 }
806 
807 BOOST_AUTO_TEST_CASE(NeonImportDisabledFallbackToCl)
808 {
809  using namespace armnn;
810 
812  IRuntimePtr runtime(IRuntime::Create(options));
813 
814  // Builds up the structure of the network.
816 
817  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
818  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
819  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
820  IConnectableLayer* add = net->AddAdditionLayer("add");
821  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
822  IConnectableLayer* output = net->AddOutputLayer(0, "output");
823 
824  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
825  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
826  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
827  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
828  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
829 
830  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
831 
832  input0->GetOutputSlot(0).SetTensorInfo(info);
833  input1->GetOutputSlot(0).SetTensorInfo(info);
834  input2->GetOutputSlot(0).SetTensorInfo(info);
835  add->GetOutputSlot(0).SetTensorInfo(info);
836  sub->GetOutputSlot(0).SetTensorInfo(info);
837 
838  std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
839  // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
840  sub->BackendSelectionHint(backends[1]);
841 
842  // optimize the network
843  OptimizerOptions optOptions;
844  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
845 
846  Graph& graph = GetGraphForTesting(optNet.get());
847 
848  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
849  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
850  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
851  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
852  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
853  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
854  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
855 
856  // Checks order is valid.
857  BOOST_TEST(CheckOrder(graph, layer0, layer1));
858  BOOST_TEST(CheckOrder(graph, layer1, layer2));
859  BOOST_TEST(CheckOrder(graph, layer2, layer3));
860  BOOST_TEST(CheckOrder(graph, layer3, layer4));
861  BOOST_TEST(CheckOrder(graph, layer4, layer5));
862  BOOST_TEST(CheckOrder(graph, layer5, layer6));
863 
864  // Use memory import between backends
865  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
866 
867  // Correctly use backend hint
868  BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
869 
870  // Load it into the runtime. It should pass.
871  NetworkId netId;
872  runtime->LoadNetwork(netId, std::move(optNet));
873 
874  // Creates structures for input & output
875  std::vector<float> inputData0
876  {
877  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
878  };
879  std::vector<float> inputData1
880  {
881  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
882  };
883  std::vector<float> inputData2
884  {
885  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
886  };
887 
888  std::vector<float> outputData(12);
889 
890  std::vector<float> expectedOutput
891  {
892  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
893  };
894 
895  InputTensors inputTensors
896  {
897  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
898  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
899  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
900  };
901  OutputTensors outputTensors
902  {
903  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
904  };
905 
906  runtime->GetProfiler(netId)->EnableProfiling(true);
907 
908  // Do the inference
909  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
910 
911  // Retrieve the Profiler.Print() output to get the workload execution
913  std::stringstream ss;
914  profilerManager.GetProfiler()->Print(ss);;
915  std::string dump = ss.str();
916 
917  // Executed Subtraction using GpuAcc
918  std::size_t found = dump.find("ClSubtractionWorkload_Execute");
919  BOOST_TEST(found != std::string::npos);
920 
921  // Contain CopyMemGeneric
922  found = dump.find("CopyMemGeneric");
923  BOOST_TEST(found != std::string::npos);
924 
925  // Check output is as expected
926  BOOST_TEST(outputData == expectedOutput);
927 }
928 
929 BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackSubgraphToCl)
930 {
931  using namespace armnn;
932 
934  IRuntimePtr runtime(IRuntime::Create(options));
935 
936  // Builds up the structure of the network.
938 
939  Pooling2dDescriptor desc;
940 
941  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
942  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
943  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
944  IConnectableLayer* add = net->AddAdditionLayer("add");
945  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
946  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
947  IConnectableLayer* output = net->AddOutputLayer(0, "output");
948 
949  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
950  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
951  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
952  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
953  sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
954  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
955 
956  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
957  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
958 
959  input0->GetOutputSlot(0).SetTensorInfo(info);
960  input1->GetOutputSlot(0).SetTensorInfo(info);
961  input2->GetOutputSlot(0).SetTensorInfo(info);
962  add->GetOutputSlot(0).SetTensorInfo(info);
963  sub->GetOutputSlot(0).SetTensorInfo(info);
964  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
965 
966  std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
967  // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
968  sub->BackendSelectionHint(backends[1]);
969 
970  // optimize the network
971  OptimizerOptions optOptions;
972  optOptions.m_ImportEnabled = true;
973  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
974 
975  Graph& graph = GetGraphForTesting(optNet.get());
976 
977  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
978  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
979  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
980  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
981  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
982  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
983  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
984  armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
985  armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
986 
987  // Checks order is valid.
988  BOOST_TEST(CheckOrder(graph, layer0, layer1));
989  BOOST_TEST(CheckOrder(graph, layer1, layer2));
990  BOOST_TEST(CheckOrder(graph, layer2, layer3));
991  BOOST_TEST(CheckOrder(graph, layer3, layer4));
992  BOOST_TEST(CheckOrder(graph, layer4, layer5));
993  BOOST_TEST(CheckOrder(graph, layer5, layer6));
994  BOOST_TEST(CheckOrder(graph, layer6, layer7));
995  BOOST_TEST(CheckOrder(graph, layer7, layer8));
996 
997  // Use memory import between backends
998  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
999  BOOST_TEST((layer6->GetType() == LayerType::MemCopy));
1000 
1001  // Correctly use backend hint
1002  BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
1003 
1004  // Load it into the runtime. It should pass.
1005  NetworkId netId;
1006  std::string ignoredErrorMessage;
1007  INetworkProperties networkProperties(true, true);
1008 
1009  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
1010 
1011  // Creates structures for input & output
1012  std::vector<float> inputData0
1013  {
1014  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
1015  };
1016  std::vector<float> inputData1
1017  {
1018  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
1019  };
1020  std::vector<float> inputData2
1021  {
1022  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
1023  };
1024 
1025  std::vector<float> outputData(2);
1026 
1027  std::vector<float> expectedOutput{ 11.0f, -1.0f };
1028 
1029  InputTensors inputTensors
1030  {
1031  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
1032  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
1033  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
1034  };
1035  OutputTensors outputTensors
1036  {
1037  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1038  };
1039 
1040  runtime->GetProfiler(netId)->EnableProfiling(true);
1041 
1042  // Do the inference
1043  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1044 
1045  // Retrieve the Profiler.Print() output to get the workload execution
1047  std::stringstream ss;
1048  profilerManager.GetProfiler()->Print(ss);;
1049  std::string dump = ss.str();
1050 
1051  // Executed Subtraction using GpuAcc
1052  std::size_t found = dump.find("ClSubtractionWorkload_Execute");
1053  BOOST_TEST(found != std::string::npos);
1054 
1055  // Correctly switch back to CpuAcc
1056  found = dump.find("NeonPooling2dWorkload_Execute");
1057  BOOST_TEST(found != std::string::npos);
1058 
1059  // Contain CopyMemGeneric
1060  found = dump.find("CopyMemGeneric");
1061  BOOST_TEST(found != std::string::npos);
1062 
1063  // Contains SyncMemGeneric for output
1064  found = dump.find("SyncMemGeneric");
1065  BOOST_TEST(found != std::string::npos);
1066 
1067  // Check output is as expected
1068  BOOST_TEST(outputData == expectedOutput);
1069 }
1070 
1071 BOOST_AUTO_TEST_CASE(NeonImportDisableFallbackSubgraphToCl)
1072 {
1073  using namespace armnn;
1074 
1075  IRuntime::CreationOptions options;
1076  IRuntimePtr runtime(IRuntime::Create(options));
1077 
1078  // Builds up the structure of the network.
1080 
1081  Pooling2dDescriptor desc;
1082 
1083  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
1084  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
1085  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
1086  IConnectableLayer* add = net->AddAdditionLayer("add");
1087  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
1088  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
1089  IConnectableLayer* output = net->AddOutputLayer(0, "output");
1090 
1091  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
1092  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
1093  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
1094  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
1095  sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
1096  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1097 
1098  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
1099  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
1100 
1101  input0->GetOutputSlot(0).SetTensorInfo(info);
1102  input1->GetOutputSlot(0).SetTensorInfo(info);
1103  input2->GetOutputSlot(0).SetTensorInfo(info);
1104  add->GetOutputSlot(0).SetTensorInfo(info);
1105  sub->GetOutputSlot(0).SetTensorInfo(info);
1106  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
1107 
1108  std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
1109  // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
1110  sub->BackendSelectionHint(backends[1]);
1111 
1112  // optimize the network
1113  OptimizerOptions optOptions;
1114  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
1115 
1116  Graph& graph = GetGraphForTesting(optNet.get());
1117 
1118  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
1119  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
1120  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
1121  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
1122  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
1123  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
1124  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
1125  armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
1126  armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
1127 
1128  // Checks order is valid.
1129  BOOST_TEST(CheckOrder(graph, layer0, layer1));
1130  BOOST_TEST(CheckOrder(graph, layer1, layer2));
1131  BOOST_TEST(CheckOrder(graph, layer2, layer3));
1132  BOOST_TEST(CheckOrder(graph, layer3, layer4));
1133  BOOST_TEST(CheckOrder(graph, layer4, layer5));
1134  BOOST_TEST(CheckOrder(graph, layer5, layer6));
1135  BOOST_TEST(CheckOrder(graph, layer6, layer7));
1136  BOOST_TEST(CheckOrder(graph, layer7, layer8));
1137 
1138  // Use memory import between backends
1139  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
1140  BOOST_TEST((layer6->GetType() == LayerType::MemCopy));
1141 
1142  // Correctly use backend hint
1143  BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
1144 
1145  // Load it into the runtime. It should pass.
1146  NetworkId netId;
1147  runtime->LoadNetwork(netId, std::move(optNet));
1148 
1149  // Creates structures for input & output
1150  std::vector<float> inputData0
1151  {
1152  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
1153  };
1154  std::vector<float> inputData1
1155  {
1156  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
1157  };
1158  std::vector<float> inputData2
1159  {
1160  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
1161  };
1162 
1163  std::vector<float> outputData(2);
1164 
1165  std::vector<float> expectedOutput{ 11.0f, -1.0f };
1166 
1167  InputTensors inputTensors
1168  {
1169  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
1170  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
1171  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
1172  };
1173  OutputTensors outputTensors
1174  {
1175  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1176  };
1177 
1178  runtime->GetProfiler(netId)->EnableProfiling(true);
1179 
1180  // Do the inference
1181  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1182 
1183  // Retrieve the Profiler.Print() output to get the workload execution
1185  std::stringstream ss;
1186  profilerManager.GetProfiler()->Print(ss);;
1187  std::string dump = ss.str();
1188 
1189  // Executed Subtraction using GpuAcc
1190  std::size_t found = dump.find("ClSubtractionWorkload_Execute");
1191  BOOST_TEST(found != std::string::npos);
1192 
1193  // Correctly switch back to CpuAcc
1194  found = dump.find("NeonPooling2dWorkload_Execute");
1195  BOOST_TEST(found != std::string::npos);
1196 
1197  // Contain CopyMemGeneric
1198  found = dump.find("CopyMemGeneric");
1199  BOOST_TEST(found != std::string::npos);
1200 
1201  // Check output is as expected
1202  BOOST_TEST(outputData == expectedOutput);
1203 }
1204 #endif
1205 
BOOST_AUTO_TEST_SUITE(TensorflowLiteParser)
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:37
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:62
BackendIdSet GetBackendIds() const
std::unordered_set< BackendId > BackendIdSet
Definition: BackendId.hpp:191
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:489
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
Definition: GraphUtils.cpp:68
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
BOOST_AUTO_TEST_CASE(FallbackImportToCpuAcc)
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:26
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:522
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
int NetworkId
Definition: IRuntime.hpp:20
Copyright (c) 2021 ARM Limited and Contributors.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
constexpr const char * MockImportBackendId()
IProfiler * GetProfiler()
Definition: Profiling.cpp:501
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:306
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1502
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:314
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:265
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:174
GPU Execution: OpenCL: ArmCompute.
BOOST_AUTO_TEST_CASE(CheckConvolution2dLayer)
const BackendId & GetBackendId() const
Definition: Layer.hpp:269
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:25
BOOST_AUTO_TEST_SUITE_END()
CPU Execution: NEON: ArmCompute.
armnn::IBackendInternalUniquePtr CreateBackendObject(const armnn::BackendId &backendId)
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:173
virtual int Connect(IInputSlot &destination)=0
A Pooling2dDescriptor for the Pooling2dLayer.
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:510