ArmNN
 22.05.01
NeonFallbackTests.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include <CommonTestUtils.hpp>
8 
9 #include <GraphUtils.hpp>
10 
11 #include <doctest/doctest.h>
12 
13 TEST_SUITE("NeonFallback")
14 {
15 TEST_CASE("FallbackImportToCpuAcc")
16 {
17  using namespace armnn;
18 
19  // Create a mock backend objectN
20  MockImportBackendInitialiser initialiser; // Register the Mock Backend
21  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
22  CHECK((backendObjPtr != nullptr));
23 
25  if (backendIds.find("MockRef") == backendIds.end())
26  {
27  std::string message = "Cannot load MockRef";
28  FAIL(message);
29  }
30 
31  // Create runtime in which test will run and allow fallback to CpuRef.
33  IRuntimePtr runtime(IRuntime::Create(options));
34 
35  // Builds up the structure of the network.
37 
38  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
39  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
40  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
41  IConnectableLayer* add = net->AddAdditionLayer("add");
42  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
43  IConnectableLayer* output = net->AddOutputLayer(0, "output");
44 
45  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
46  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
47  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
48  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
49  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
50 
51  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
52 
53  input0->GetOutputSlot(0).SetTensorInfo(info);
54  input1->GetOutputSlot(0).SetTensorInfo(info);
55  input2->GetOutputSlot(0).SetTensorInfo(info);
56  add->GetOutputSlot(0).SetTensorInfo(info);
57  sub->GetOutputSlot(0).SetTensorInfo(info);
58 
59  // optimize the network
60  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
61  OptimizerOptions optOptions;
62  optOptions.m_ImportEnabled = true;
63  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
64 
65  Graph& graph = GetGraphForTesting(optNet.get());
66 
67  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
68  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
69  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
70  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
71  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
72  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
73  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
74 
75  // Checks order is valid.
76  CHECK(CheckOrder(graph, layer0, layer1));
77  CHECK(CheckOrder(graph, layer1, layer2));
78  CHECK(CheckOrder(graph, layer2, layer3));
79  CHECK(CheckOrder(graph, layer3, layer4));
80  CHECK(CheckOrder(graph, layer4, layer5));
81  CHECK(CheckOrder(graph, layer5, layer6));
82 
83  // Load it into the runtime. It should pass.
84  NetworkId netId;
85  std::string ignoredErrorMessage;
87  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
88 
89  // Creates structures for input & output
90  std::vector<float> inputData0
91  {
92  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
93  };
94  std::vector<float> inputData1
95  {
96  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
97  };
98  std::vector<float> inputData2
99  {
100  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
101  };
102 
103  std::vector<float> outputData(12);
104 
105  std::vector<float> expectedOutput
106  {
107  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
108  };
109 
110  armnn::TensorInfo inputTensorInfo0 = runtime->GetInputTensorInfo(netId, 0);
111  armnn::TensorInfo inputTensorInfo1 = runtime->GetInputTensorInfo(netId, 1);
112  armnn::TensorInfo inputTensorInfo2 = runtime->GetInputTensorInfo(netId, 2);
113  inputTensorInfo0.SetConstant(true);
114  inputTensorInfo1.SetConstant(true);
115  inputTensorInfo2.SetConstant(true);
116 
117  InputTensors inputTensors
118  {
119  { 0, armnn::ConstTensor(inputTensorInfo0, inputData0.data()) },
120  { 1, armnn::ConstTensor(inputTensorInfo1, inputData1.data()) },
121  { 2, armnn::ConstTensor(inputTensorInfo2, inputData2.data()) }
122  };
123  OutputTensors outputTensors
124  {
125  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
126  };
127 
128  runtime->GetProfiler(netId)->EnableProfiling(true);
129 
130  // Do the inference
131  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
132 
133  // Retrieve the Profiler.Print() output to get the workload execution
135  std::stringstream ss;
136  profilerManager.GetProfiler()->Print(ss);;
137  std::string dump = ss.str();
138 
139  // Contains ImportMemGeneric
140  std::size_t found = dump.find("ImportMemGeneric");
141  CHECK(found != std::string::npos);
142 
143  // Contains SyncMemGeneric
144  found = dump.find("SyncMemGeneric");
145  CHECK(found != std::string::npos);
146 
147  // Does not contain CopyMemGeneric
148  found = dump.find("CopyMemGeneric");
149  CHECK(found == std::string::npos);
150 
151  // Use memory import between backends
152  CHECK((layer4->GetType() == LayerType::MemImport));
153 
154  // Check output is as expected
155  CHECK(outputData == expectedOutput);
156 }
157 
158 TEST_CASE("FallbackPaddingCopyToCpuAcc")
159 {
160  using namespace armnn;
161 
162  // Create a mock backend object
163  MockImportBackendInitialiser initialiser; // Register the Mock Backend
164  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
165  CHECK((backendObjPtr != nullptr));
166 
168  if (backendIds.find("MockRef") == backendIds.end())
169  {
170  std::string message = "Cannot load MockRef";
171  FAIL(message);
172  }
173 
174  // Create runtime in which test will run and allow fallback to CpuRef.
176  IRuntimePtr runtime(IRuntime::Create(options));
177 
178  // Builds up the structure of the network.
180 
181  Pooling2dDescriptor desc;
182 
183  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
184  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
185  IConnectableLayer* add = net->AddAdditionLayer("add");
186  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
187  IConnectableLayer* output = net->AddOutputLayer(0, "output");
188 
189  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
190  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
191  add->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
192  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
193 
194  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
195  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
196 
197  input0->GetOutputSlot(0).SetTensorInfo(info);
198  input1->GetOutputSlot(0).SetTensorInfo(info);
199  add->GetOutputSlot(0).SetTensorInfo(info);
200  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
201 
202  // optimize the network
203  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
204  OptimizerOptions optOptions;
205  optOptions.m_ImportEnabled = true;
206  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
207 
208  Graph& graph = GetGraphForTesting(optNet.get());
209 
210  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
211  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
212  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "add");
213  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ add (0) -> pooling (0) ]");
214  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "pooling");
215  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output");
216 
217  // Checks order is valid.
218  CHECK(CheckOrder(graph, layer0, layer1));
219  CHECK(CheckOrder(graph, layer1, layer2));
220  CHECK(CheckOrder(graph, layer2, layer3));
221  CHECK(CheckOrder(graph, layer3, layer4));
222  CHECK(CheckOrder(graph, layer4, layer5));
223 
224  // Load it into the runtime. It should pass.
225  NetworkId netId;
226  std::string ignoredErrorMessage;
228 
229  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
230 
231  // Creates structures for input & output
232  std::vector<float> inputData0
233  {
234  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
235  };
236  std::vector<float> inputData1
237  {
238  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
239  };
240 
241  std::vector<float> outputData(2);
242 
243  std::vector<float> expectedOutput
244  {
245  6.0f, 12.0f
246  };
247 
248  armnn::TensorInfo inputTensorInfo0 = runtime->GetInputTensorInfo(netId, 0);
249  armnn::TensorInfo inputTensorInfo1 = runtime->GetInputTensorInfo(netId, 1);
250  inputTensorInfo0.SetConstant(true);
251  inputTensorInfo1.SetConstant(true);
252 
253  InputTensors inputTensors
254  {
255  { 0, armnn::ConstTensor(inputTensorInfo0, inputData0.data()) },
256  { 1, armnn::ConstTensor(inputTensorInfo1, inputData1.data()) }
257  };
258  OutputTensors outputTensors
259  {
260  { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
261  };
262 
263  runtime->GetProfiler(netId)->EnableProfiling(true);
264 
265  // Do the inference
266  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
267 
268  // Retrieve the Profiler.Print() output to get the workload execution
270  std::stringstream ss;
271  profilerManager.GetProfiler()->Print(ss);;
272  std::string dump = ss.str();
273 
274  // Contains CopyMemGeneric between the backends
275  std::size_t found = dump.find("CopyMemGeneric");
276  CHECK(found != std::string::npos);
277 
278  // Contains SyncMemGeneric for the output
279  found = dump.find("SyncMemGeneric");
280  CHECK(found != std::string::npos);
281 
282  // Does not contain ImportMemGeneric
283  found = dump.find("ImportMemGeneric");
284  CHECK(found == std::string::npos);
285 
286  // Use memory import between backends
287  CHECK((layer3->GetType() == LayerType::MemCopy));
288 
289  // Check output is as expected
290  CHECK(outputData == expectedOutput);
291 }
292 
293 TEST_CASE("FallbackImportFromCpuAcc")
294 {
295  using namespace armnn;
296 
297  // Create a mock backend object
298  MockImportBackendInitialiser initialiser; // Register the Mock Backend
299  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
300  CHECK((backendObjPtr != nullptr));
301 
303  if (backendIds.find("MockRef") == backendIds.end())
304  {
305  std::string message = "Cannot load MockRef";
306  FAIL(message);
307  }
308 
309  // Create runtime in which test will run and allow fallback to CpuRef.
311  IRuntimePtr runtime(IRuntime::Create(options));
312 
313  // Builds up the structure of the network.
315 
316  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
317  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
318  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
319  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
320  IConnectableLayer* add = net->AddAdditionLayer("add");
321  IConnectableLayer* output = net->AddOutputLayer(0, "output");
322 
323  input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
324  input1->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
325  input2->GetOutputSlot(0).Connect(add->GetInputSlot(0));
326  sub->GetOutputSlot(0).Connect(add->GetInputSlot(1));
327  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
328 
329  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
330 
331  input0->GetOutputSlot(0).SetTensorInfo(info);
332  input1->GetOutputSlot(0).SetTensorInfo(info);
333  input2->GetOutputSlot(0).SetTensorInfo(info);
334  sub->GetOutputSlot(0).SetTensorInfo(info);
335  add->GetOutputSlot(0).SetTensorInfo(info);
336 
337  // optimize the network
338  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
339  OptimizerOptions optOptions;
340  optOptions.m_ImportEnabled = true;
341  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
342 
343  Graph& graph = GetGraphForTesting(optNet.get());
344 
345  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
346  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
347  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
348  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "sub");
349  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ sub (0) -> add (1) ]");
350  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "add");
351  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
352 
353  // Checks order is valid.
354  CHECK(CheckOrder(graph, layer0, layer1));
355  CHECK(CheckOrder(graph, layer1, layer2));
356  CHECK(CheckOrder(graph, layer2, layer3));
357  CHECK(CheckOrder(graph, layer3, layer4));
358  CHECK(CheckOrder(graph, layer4, layer5));
359  CHECK(CheckOrder(graph, layer5, layer6));
360 
361  // Load it into the runtime. It should pass.
362  NetworkId netId;
363  std::string ignoredErrorMessage;
364 
366  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
367 
368  // Creates structures for input & output
369  std::vector<float> inputData0
370  {
371  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
372  };
373  std::vector<float> inputData1
374  {
375  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
376  };
377  std::vector<float> inputData2
378  {
379  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
380  };
381 
382  std::vector<float> outputData(12);
383 
384  std::vector<float> expectedOutput
385  {
386  13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
387  };
388 
389  armnn::TensorInfo inputTensorInfo0 = runtime->GetInputTensorInfo(netId, 0);
390  armnn::TensorInfo inputTensorInfo1 = runtime->GetInputTensorInfo(netId, 1);
391  armnn::TensorInfo inputTensorInfo2 = runtime->GetInputTensorInfo(netId, 2);
392  inputTensorInfo0.SetConstant(true);
393  inputTensorInfo1.SetConstant(true);
394  inputTensorInfo2.SetConstant(true);
395 
396  InputTensors inputTensors
397  {
398  { 0, armnn::ConstTensor(inputTensorInfo0, inputData0.data()) },
399  { 1, armnn::ConstTensor(inputTensorInfo1, inputData1.data()) },
400  { 2, armnn::ConstTensor(inputTensorInfo2, inputData2.data()) }
401  };
402  OutputTensors outputTensors
403  {
404  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
405  };
406 
407  runtime->GetProfiler(netId)->EnableProfiling(true);
408 
409  // Do the inference
410  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
411 
412  // Retrieve the Profiler.Print() output to get the workload execution
414  std::stringstream ss;
415  profilerManager.GetProfiler()->Print(ss);;
416  std::string dump = ss.str();
417 
418  // Contains ImportMemGeneric
419  std::size_t found = dump.find("ImportMemGeneric");
420  CHECK(found != std::string::npos);
421 
422  // Contains SyncMemGeneric
423  found = dump.find("SyncMemGeneric");
424  CHECK(found != std::string::npos);
425 
426  // Does not contain CopyMemGeneric
427  found = dump.find("CopyMemGeneric");
428  CHECK(found == std::string::npos);
429 
430  // Use memory import between backends
431  CHECK((layer4->GetType() == LayerType::MemImport));
432 
433  // Check output is as expected
434  CHECK(outputData == expectedOutput);
435 }
436 
437 TEST_CASE("FallbackPaddingCopyFromCpuAcc")
438 {
439  using namespace armnn;
440 
441  // Create a mock backend object
442  MockImportBackendInitialiser initialiser; // Register the Mock Backend
443  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
444  CHECK((backendObjPtr != nullptr));
445 
447  if (backendIds.find("MockRef") == backendIds.end())
448  {
449  std::string message = "Cannot load MockRef";
450  FAIL(message);
451  }
452 
453  // Create runtime in which test will run and allow fallback to CpuRef.
455  IRuntimePtr runtime(IRuntime::Create(options));
456 
457  // Builds up the structure of the network.
459 
460  Pooling2dDescriptor desc;
461 
462  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
463  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
464  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
465  IConnectableLayer* add = net->AddAdditionLayer("add");
466  IConnectableLayer* output = net->AddOutputLayer(0, "output");
467 
468  input0->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
469  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
470  pooling->GetOutputSlot(0).Connect(add->GetInputSlot(0));
471  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
472 
473  TensorInfo inputInfo = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
474  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
475 
476  input0->GetOutputSlot(0).SetTensorInfo(inputInfo);
477  input1->GetOutputSlot(0).SetTensorInfo(poolingInfo);
478  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
479  add->GetOutputSlot(0).SetTensorInfo(poolingInfo);
480 
481  // optimize the network
482  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
483  OptimizerOptions optOptions;
484  optOptions.m_ImportEnabled = true;
485  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
486 
487  Graph& graph = GetGraphForTesting(optNet.get());
488 
489  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
490  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
491  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "pooling");
492  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ pooling (0) -> add (0) ]");
493  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "add");
494  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output");
495 
496  // Checks order is valid.
497  CHECK(CheckOrder(graph, layer0, layer1));
498  CHECK(CheckOrder(graph, layer1, layer2));
499  CHECK(CheckOrder(graph, layer2, layer3));
500  CHECK(CheckOrder(graph, layer3, layer4));
501  CHECK(CheckOrder(graph, layer4, layer5));
502 
503  // Load it into the runtime. It should pass.
504  NetworkId netId;
505  std::string ignoredErrorMessage;
507 
508  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
509 
510  // Creates structures for input & output
511  std::vector<float> inputData0
512  {
513  1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f
514  };
515  std::vector<float> inputData1
516  {
517  -1.0f, 3.0f
518  };
519 
520  std::vector<float> outputData(2);
521 
522  std::vector<float> expectedOutput
523  {
524  5.0f, 15.0f
525  };
526 
527  armnn::TensorInfo inputTensorInfo0 = runtime->GetInputTensorInfo(netId, 0);
528  armnn::TensorInfo inputTensorInfo1 = runtime->GetInputTensorInfo(netId, 1);
529  inputTensorInfo0.SetConstant(true);
530  inputTensorInfo1.SetConstant(true);
531 
532  InputTensors inputTensors
533  {
534  { 0, armnn::ConstTensor(inputTensorInfo0, inputData0.data()) },
535  { 1, armnn::ConstTensor(inputTensorInfo1, inputData1.data()) }
536  };
537  OutputTensors outputTensors
538  {
539  { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
540  };
541 
542  runtime->GetProfiler(netId)->EnableProfiling(true);
543 
544  // Do the inference
545  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
546 
547  // Retrieve the Profiler.Print() output to get the workload execution
549  std::stringstream ss;
550  profilerManager.GetProfiler()->Print(ss);;
551  std::string dump = ss.str();
552 
553  // Contains CopyMemGeneric between the backends
554  std::size_t found = dump.find("CopyMemGeneric");
555  CHECK(found != std::string::npos);
556 
557  // Contains SyncMemGeneric for the output
558  found = dump.find("SyncMemGeneric");
559  CHECK(found != std::string::npos);
560 
561  // Does not contain ImportMemGeneric
562  found = dump.find("ImportMemGeneric");
563  CHECK(found == std::string::npos);
564 
565  // Use memory import between backends
566  CHECK((layer3->GetType() == LayerType::MemCopy));
567 
568  // Check output is as expected
569  CHECK(outputData == expectedOutput);
570 }
571 
572 TEST_CASE("FallbackDisableImportFromCpuAcc")
573 {
574  using namespace armnn;
575 
576  // Create a mock backend object
577  MockImportBackendInitialiser initialiser; // Register the Mock Backend
578  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
579  CHECK((backendObjPtr != nullptr));
580 
582  if (backendIds.find("MockRef") == backendIds.end())
583  {
584  std::string message = "Cannot load MockRef";
585  FAIL(message);
586  }
587 
588  // Create runtime in which test will run and allow fallback to CpuRef.
590  IRuntimePtr runtime(IRuntime::Create(options));
591 
592  // Builds up the structure of the network.
594 
595  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
596  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
597  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
598  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
599  IConnectableLayer* add = net->AddAdditionLayer("add");
600  IConnectableLayer* output = net->AddOutputLayer(0, "output");
601 
602  input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
603  input1->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
604  input2->GetOutputSlot(0).Connect(add->GetInputSlot(0));
605  sub->GetOutputSlot(0).Connect(add->GetInputSlot(1));
606  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
607 
608  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
609 
610  input0->GetOutputSlot(0).SetTensorInfo(info);
611  input1->GetOutputSlot(0).SetTensorInfo(info);
612  input2->GetOutputSlot(0).SetTensorInfo(info);
613  sub->GetOutputSlot(0).SetTensorInfo(info);
614  add->GetOutputSlot(0).SetTensorInfo(info);
615 
616  // optimize the network
617  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
618  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
619 
620  Graph& graph = GetGraphForTesting(optNet.get());
621 
622  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
623  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
624  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
625  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "sub");
626  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ sub (0) -> add (1) ]");
627  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "add");
628  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
629 
630  // Checks order is valid.
631  CHECK(CheckOrder(graph, layer0, layer1));
632  CHECK(CheckOrder(graph, layer1, layer2));
633  CHECK(CheckOrder(graph, layer2, layer3));
634  CHECK(CheckOrder(graph, layer3, layer4));
635  CHECK(CheckOrder(graph, layer4, layer5));
636  CHECK(CheckOrder(graph, layer5, layer6));
637 
638  // Load it into the runtime. It should pass.
639  NetworkId netId;
640  std::string ignoredErrorMessage;
642 
643  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
644 
645  // Creates structures for input & output
646  std::vector<float> inputData0
647  {
648  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
649  };
650  std::vector<float> inputData1
651  {
652  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
653  };
654  std::vector<float> inputData2
655  {
656  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
657  };
658 
659  std::vector<float> outputData(12);
660 
661  std::vector<float> expectedOutput
662  {
663  13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
664  };
665 
666  armnn::TensorInfo inputTensorInfo0 = runtime->GetInputTensorInfo(netId, 0);
667  armnn::TensorInfo inputTensorInfo1 = runtime->GetInputTensorInfo(netId, 1);
668  armnn::TensorInfo inputTensorInfo2 = runtime->GetInputTensorInfo(netId, 2);
669  inputTensorInfo0.SetConstant(true);
670  inputTensorInfo1.SetConstant(true);
671  inputTensorInfo2.SetConstant(true);
672 
673  InputTensors inputTensors
674  {
675  { 0, armnn::ConstTensor(inputTensorInfo0, inputData0.data()) },
676  { 1, armnn::ConstTensor(inputTensorInfo1, inputData1.data()) },
677  { 2, armnn::ConstTensor(inputTensorInfo2, inputData2.data()) }
678  };
679  OutputTensors outputTensors
680  {
681  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
682  };
683 
684  runtime->GetProfiler(netId)->EnableProfiling(true);
685 
686  // Do the inference
687  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
688 
689  // Retrieve the Profiler.Print() output to get the workload execution
691  std::stringstream ss;
692  profilerManager.GetProfiler()->Print(ss);;
693  std::string dump = ss.str();
694 
695  // Contains CopyMemGeneric between the backends
696  std::size_t found = dump.find("CopyMemGeneric");
697  CHECK(found != std::string::npos);
698 
699  // Does not contain ImportMemGeneric
700  found = dump.find("ImportMemGeneric");
701  CHECK(found == std::string::npos);
702 
703  // Use memory import between backends
704  CHECK((layer4->GetType() == LayerType::MemCopy));
705 
706  // Check output is as expected
707  CHECK(outputData == expectedOutput);
708 }
709 
710 #if defined(ARMCOMPUTECL_ENABLED)
711 TEST_CASE("NeonImportEnabledFallbackToCl")
712 {
713  using namespace armnn;
714 
716  IRuntimePtr runtime(IRuntime::Create(options));
717 
718  // Builds up the structure of the network.
720 
721  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
722  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
723  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
724  IConnectableLayer* add = net->AddAdditionLayer("add");
725  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
726  IConnectableLayer* output = net->AddOutputLayer(0, "output");
727 
728  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
729  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
730  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
731  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
732  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
733 
734  TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
735 
736  input0->GetOutputSlot(0).SetTensorInfo(info);
737  input1->GetOutputSlot(0).SetTensorInfo(info);
738  input2->GetOutputSlot(0).SetTensorInfo(info);
739  add->GetOutputSlot(0).SetTensorInfo(info);
740  sub->GetOutputSlot(0).SetTensorInfo(info);
741 
742  std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
743  // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
744  sub->BackendSelectionHint(backends[1]);
745 
746  // optimize the network
747  OptimizerOptions optOptions;
748  optOptions.m_ImportEnabled = true;
749  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
750 
751  Graph& graph = GetGraphForTesting(optNet.get());
752 
753  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
754  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
755  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
756  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
757  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
758  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
759  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
760 
761  // Checks order is valid.
762  CHECK(CheckOrder(graph, layer0, layer1));
763  CHECK(CheckOrder(graph, layer1, layer2));
764  CHECK(CheckOrder(graph, layer2, layer3));
765  CHECK(CheckOrder(graph, layer3, layer4));
766  CHECK(CheckOrder(graph, layer4, layer5));
767  CHECK(CheckOrder(graph, layer5, layer6));
768 
769  // Use memory import between backends
770  CHECK((layer4->GetType() == LayerType::MemCopy));
771 
772  // Correctly use backend hint
773  CHECK((layer5->GetBackendId() == Compute::GpuAcc ));
774 
775  // Load it into the runtime. It should pass.
776  NetworkId netId;
777  std::string ignoredErrorMessage;
778 
780 
781  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
782 
783  // Creates structures for input & output
784  std::vector<float> inputData0
785  {
786  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
787  };
788  std::vector<float> inputData1
789  {
790  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
791  };
792  std::vector<float> inputData2
793  {
794  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
795  };
796 
797  std::vector<float> outputData(16);
798 
799  std::vector<float> expectedOutput
800  {
801  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f, 11.0f, 9.0f, 7.0f, 5.0f
802  };
803 
804  // Creates structures for input & output
805  unsigned int numElements = info.GetNumElements();
806  size_t totalBytes = numElements * sizeof(float);
807 
808  // Prepare aligned data
809  const size_t alignment = 64;
810  size_t space = totalBytes + alignment + alignment;
811  auto inputData = std::make_unique<uint8_t[]>(space);
812  void* alignedInputPtr = inputData.get();
813  CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
814 
815  auto* intputPtr = reinterpret_cast<float*>(alignedInputPtr);
816  std::copy(inputData2.begin(), inputData2.end(), intputPtr);
817 
818  armnn::TensorInfo inputTensorInfo0 = runtime->GetInputTensorInfo(netId, 0);
819  armnn::TensorInfo inputTensorInfo1 = runtime->GetInputTensorInfo(netId, 1);
820  armnn::TensorInfo inputTensorInfo2 = runtime->GetInputTensorInfo(netId, 2);
821  inputTensorInfo0.SetConstant(true);
822  inputTensorInfo1.SetConstant(true);
823  inputTensorInfo2.SetConstant(true);
824 
825  InputTensors inputTensors
826  {
827  { 0, armnn::ConstTensor(inputTensorInfo0, inputData0.data()) },
828  { 1, armnn::ConstTensor(inputTensorInfo1, inputData1.data()) },
829  { 2, armnn::ConstTensor(inputTensorInfo2, alignedInputPtr) }
830  };
831  OutputTensors outputTensors
832  {
833  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
834  };
835 
836  runtime->GetProfiler(netId)->EnableProfiling(true);
837 
838  // Do the inference
839  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
840 
841  // Retrieve the Profiler.Print() output to get the workload execution
843  std::stringstream ss;
844  profilerManager.GetProfiler()->Print(ss);;
845  std::string dump = ss.str();
846 
847  // Executed Subtraction using GpuAcc
848  std::size_t found = dump.find("ClSubtractionWorkload_Execute");
849  CHECK(found != std::string::npos);
850 
851  // Contain CopyMemGeneric
852  found = dump.find("CopyMemGeneric");
853  CHECK(found != std::string::npos);
854 
855  // Check output is as expected
856  for(unsigned int i = 0; i < numElements; ++i)
857  {
858  CHECK(outputData[i] == expectedOutput[i]);
859  }
860  runtime->UnloadNetwork(netId);
861 }
862 
863 TEST_CASE("NeonImportDisabledFallbackToCl")
864 {
865  using namespace armnn;
866 
868  IRuntimePtr runtime(IRuntime::Create(options));
869 
870  // Builds up the structure of the network.
872 
873  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
874  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
875  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
876  IConnectableLayer* add = net->AddAdditionLayer("add");
877  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
878  IConnectableLayer* output = net->AddOutputLayer(0, "output");
879 
880  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
881  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
882  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
883  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
884  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
885 
886  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
887 
888  input0->GetOutputSlot(0).SetTensorInfo(info);
889  input1->GetOutputSlot(0).SetTensorInfo(info);
890  input2->GetOutputSlot(0).SetTensorInfo(info);
891  add->GetOutputSlot(0).SetTensorInfo(info);
892  sub->GetOutputSlot(0).SetTensorInfo(info);
893 
894  std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
895  // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
896  sub->BackendSelectionHint(backends[1]);
897 
898  // optimize the network
899  OptimizerOptions optOptions;
900  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
901 
902  Graph& graph = GetGraphForTesting(optNet.get());
903 
904  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
905  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
906  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
907  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
908  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
909  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
910  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
911 
912  // Checks order is valid.
913  CHECK(CheckOrder(graph, layer0, layer1));
914  CHECK(CheckOrder(graph, layer1, layer2));
915  CHECK(CheckOrder(graph, layer2, layer3));
916  CHECK(CheckOrder(graph, layer3, layer4));
917  CHECK(CheckOrder(graph, layer4, layer5));
918  CHECK(CheckOrder(graph, layer5, layer6));
919 
920  // Use memory import between backends
921  CHECK((layer4->GetType() == LayerType::MemCopy));
922 
923  // Correctly use backend hint
924  CHECK((layer5->GetBackendId() == Compute::GpuAcc ));
925 
926  // Load it into the runtime. It should pass.
927  NetworkId netId;
928  runtime->LoadNetwork(netId, std::move(optNet));
929 
930  // Creates structures for input & output
931  std::vector<float> inputData0
932  {
933  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
934  };
935  std::vector<float> inputData1
936  {
937  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
938  };
939  std::vector<float> inputData2
940  {
941  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
942  };
943 
944  std::vector<float> outputData(12);
945 
946  std::vector<float> expectedOutput
947  {
948  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
949  };
950 
951  armnn::TensorInfo inputTensorInfo0 = runtime->GetInputTensorInfo(netId, 0);
952  armnn::TensorInfo inputTensorInfo1 = runtime->GetInputTensorInfo(netId, 1);
953  armnn::TensorInfo inputTensorInfo2 = runtime->GetInputTensorInfo(netId, 2);
954  inputTensorInfo0.SetConstant(true);
955  inputTensorInfo1.SetConstant(true);
956  inputTensorInfo2.SetConstant(true);
957 
958  InputTensors inputTensors
959  {
960  { 0, armnn::ConstTensor(inputTensorInfo0, inputData0.data()) },
961  { 1, armnn::ConstTensor(inputTensorInfo1, inputData1.data()) },
962  { 2, armnn::ConstTensor(inputTensorInfo2, inputData2.data()) }
963  };
964  OutputTensors outputTensors
965  {
966  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
967  };
968 
969  runtime->GetProfiler(netId)->EnableProfiling(true);
970 
971  // Do the inference
972  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
973 
974  // Retrieve the Profiler.Print() output to get the workload execution
976  std::stringstream ss;
977  profilerManager.GetProfiler()->Print(ss);;
978  std::string dump = ss.str();
979 
980  // Executed Subtraction using GpuAcc
981  std::size_t found = dump.find("ClSubtractionWorkload_Execute");
982  CHECK(found != std::string::npos);
983 
984  // Contain CopyMemGeneric
985  found = dump.find("CopyMemGeneric");
986  CHECK(found != std::string::npos);
987 
988  // Check output is as expected
989  CHECK(outputData == expectedOutput);
990 }
991 
992 TEST_CASE("NeonImportEnabledFallbackSubgraphToCl")
993 {
994  using namespace armnn;
995 
997  IRuntimePtr runtime(IRuntime::Create(options));
998 
999  // Builds up the structure of the network.
1001 
1002  Pooling2dDescriptor desc;
1003  desc.m_PoolWidth = 2;
1004  desc.m_PoolHeight = 2;
1005  desc.m_StrideX = 2;
1006  desc.m_StrideY = 2;
1007 
1008  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
1009  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
1010  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
1011  IConnectableLayer* add = net->AddAdditionLayer("add");
1012  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
1013  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
1014  IConnectableLayer* output = net->AddOutputLayer(0, "output");
1015 
1016  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
1017  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
1018  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
1019  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
1020  sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
1021  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1022 
1023  TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
1024  TensorInfo poolingInfo = TensorInfo({ 1, 2, 2, 1 }, DataType::Float32);
1025 
1026  input0->GetOutputSlot(0).SetTensorInfo(info);
1027  input1->GetOutputSlot(0).SetTensorInfo(info);
1028  input2->GetOutputSlot(0).SetTensorInfo(info);
1029  add->GetOutputSlot(0).SetTensorInfo(info);
1030  sub->GetOutputSlot(0).SetTensorInfo(info);
1031  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
1032 
1033  std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
1034  // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
1035  sub->BackendSelectionHint(backends[1]);
1036 
1037  // optimize the network
1038  OptimizerOptions optOptions;
1039  optOptions.m_ImportEnabled = true;
1040  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
1041 
1042  Graph& graph = GetGraphForTesting(optNet.get());
1043 
1044  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
1045  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
1046  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
1047  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
1048  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
1049  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
1050  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
1051  armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
1052  armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
1053 
1054  // Checks order is valid.
1055  CHECK(CheckOrder(graph, layer0, layer1));
1056  CHECK(CheckOrder(graph, layer1, layer2));
1057  CHECK(CheckOrder(graph, layer2, layer3));
1058  CHECK(CheckOrder(graph, layer3, layer4));
1059  CHECK(CheckOrder(graph, layer4, layer5));
1060  CHECK(CheckOrder(graph, layer5, layer6));
1061  CHECK(CheckOrder(graph, layer6, layer7));
1062  CHECK(CheckOrder(graph, layer7, layer8));
1063 
1064  // Use memory import between backends
1065  CHECK((layer4->GetType() == LayerType::MemCopy));
1066  CHECK((layer6->GetType() == LayerType::MemCopy));
1067 
1068  // Correctly use backend hint
1069  CHECK((layer5->GetBackendId() == Compute::GpuAcc ));
1070 
1071  // Load it into the runtime. It should pass.
1072  NetworkId netId;
1073  std::string ignoredErrorMessage;
1074 
1076 
1077  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
1078 
1079  // Creates structures for input & output
1080  std::vector<float> inputData0
1081  {
1082  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
1083  };
1084  std::vector<float> inputData1
1085  {
1086  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
1087  };
1088  std::vector<float> inputData2
1089  {
1090  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
1091  };
1092 
1093  std::vector<float> outputData(4);
1094 
1095  std::vector<float> expectedOutput{ 11.0f, 3.0f, -5.0f, 11.0f };
1096 
1097  // Prepare aligned data
1098  unsigned int numElements = info.GetNumElements();
1099  size_t totalBytes = numElements * sizeof(float);
1100  const size_t alignment = 64;
1101  size_t space = totalBytes + alignment + alignment;
1102  auto inputData = std::make_unique<uint8_t[]>(space);
1103  void* alignedInputPtr = inputData.get();
1104  CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
1105 
1106  auto* intputPtr = reinterpret_cast<float*>(alignedInputPtr);
1107  std::copy(inputData2.begin(), inputData2.end(), intputPtr);
1108 
1109  armnn::TensorInfo inputTensorInfo0 = runtime->GetInputTensorInfo(netId, 0);
1110  armnn::TensorInfo inputTensorInfo1 = runtime->GetInputTensorInfo(netId, 1);
1111  armnn::TensorInfo inputTensorInfo2 = runtime->GetInputTensorInfo(netId, 2);
1112  inputTensorInfo0.SetConstant(true);
1113  inputTensorInfo1.SetConstant(true);
1114  inputTensorInfo2.SetConstant(true);
1115 
1116  InputTensors inputTensors
1117  {
1118  { 0, armnn::ConstTensor(inputTensorInfo0, inputData0.data()) },
1119  { 1, armnn::ConstTensor(inputTensorInfo1, inputData1.data()) },
1120  { 2, armnn::ConstTensor(inputTensorInfo2, alignedInputPtr) }
1121  };
1122  OutputTensors outputTensors
1123  {
1124  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1125  };
1126 
1127  runtime->GetProfiler(netId)->EnableProfiling(true);
1128 
1129  // Do the inference
1130  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1131 
1132  // Retrieve the Profiler.Print() output to get the workload execution
1134  std::stringstream ss;
1135  profilerManager.GetProfiler()->Print(ss);;
1136  std::string dump = ss.str();
1137 
1138  // Executed Subtraction using GpuAcc
1139  std::size_t found = dump.find("ClSubtractionWorkload_Execute");
1140  CHECK(found != std::string::npos);
1141 
1142  // Correctly switch back to CpuAcc
1143  found = dump.find("NeonPooling2dWorkload_Execute");
1144  CHECK(found != std::string::npos);
1145 
1146  // Contain CopyMemGeneric
1147  found = dump.find("CopyMemGeneric");
1148  CHECK(found != std::string::npos);
1149 
1150  // Contains SyncMemGeneric for output
1151  found = dump.find("SyncMemGeneric");
1152  CHECK(found != std::string::npos);
1153 
1154  // Check output is as expected
1155  CHECK(outputData == expectedOutput);
1156  runtime->UnloadNetwork(netId);
1157 }
1158 
1159 TEST_CASE("NeonImportDisableFallbackSubgraphToCl")
1160 {
1161  using namespace armnn;
1162 
1163  IRuntime::CreationOptions options;
1164  IRuntimePtr runtime(IRuntime::Create(options));
1165 
1166  // Builds up the structure of the network.
1168 
1169  Pooling2dDescriptor desc;
1170 
1171  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
1172  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
1173  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
1174  IConnectableLayer* add = net->AddAdditionLayer("add");
1175  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
1176  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
1177  IConnectableLayer* output = net->AddOutputLayer(0, "output");
1178 
1179  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
1180  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
1181  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
1182  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
1183  sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
1184  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1185 
1186  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
1187  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
1188 
1189  input0->GetOutputSlot(0).SetTensorInfo(info);
1190  input1->GetOutputSlot(0).SetTensorInfo(info);
1191  input2->GetOutputSlot(0).SetTensorInfo(info);
1192  add->GetOutputSlot(0).SetTensorInfo(info);
1193  sub->GetOutputSlot(0).SetTensorInfo(info);
1194  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
1195 
1196  std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
1197  // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
1198  sub->BackendSelectionHint(backends[1]);
1199 
1200  // optimize the network
1201  OptimizerOptions optOptions;
1202  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
1203 
1204  Graph& graph = GetGraphForTesting(optNet.get());
1205 
1206  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
1207  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
1208  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
1209  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
1210  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
1211  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
1212  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
1213  armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
1214  armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
1215 
1216  // Checks order is valid.
1217  CHECK(CheckOrder(graph, layer0, layer1));
1218  CHECK(CheckOrder(graph, layer1, layer2));
1219  CHECK(CheckOrder(graph, layer2, layer3));
1220  CHECK(CheckOrder(graph, layer3, layer4));
1221  CHECK(CheckOrder(graph, layer4, layer5));
1222  CHECK(CheckOrder(graph, layer5, layer6));
1223  CHECK(CheckOrder(graph, layer6, layer7));
1224  CHECK(CheckOrder(graph, layer7, layer8));
1225 
1226  // Use memory import between backends
1227  CHECK((layer4->GetType() == LayerType::MemCopy));
1228  CHECK((layer6->GetType() == LayerType::MemCopy));
1229 
1230  // Correctly use backend hint
1231  CHECK((layer5->GetBackendId() == Compute::GpuAcc ));
1232 
1233  // Load it into the runtime. It should pass.
1234  NetworkId netId;
1235  runtime->LoadNetwork(netId, std::move(optNet));
1236 
1237  // Creates structures for input & output
1238  std::vector<float> inputData0
1239  {
1240  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
1241  };
1242  std::vector<float> inputData1
1243  {
1244  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
1245  };
1246  std::vector<float> inputData2
1247  {
1248  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
1249  };
1250 
1251  std::vector<float> outputData(2);
1252 
1253  std::vector<float> expectedOutput{ 11.0f, -1.0f };
1254 
1255  armnn::TensorInfo inputTensorInfo0 = runtime->GetInputTensorInfo(netId, 0);
1256  armnn::TensorInfo inputTensorInfo1 = runtime->GetInputTensorInfo(netId, 1);
1257  armnn::TensorInfo inputTensorInfo2 = runtime->GetInputTensorInfo(netId, 2);
1258  inputTensorInfo0.SetConstant(true);
1259  inputTensorInfo1.SetConstant(true);
1260  inputTensorInfo2.SetConstant(true);
1261 
1262  InputTensors inputTensors
1263  {
1264  { 0, armnn::ConstTensor(inputTensorInfo0, inputData0.data()) },
1265  { 1, armnn::ConstTensor(inputTensorInfo1, inputData1.data()) },
1266  { 2, armnn::ConstTensor(inputTensorInfo2, inputData2.data()) }
1267  };
1268  OutputTensors outputTensors
1269  {
1270  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1271  };
1272 
1273  runtime->GetProfiler(netId)->EnableProfiling(true);
1274 
1275  // Do the inference
1276  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1277 
1278  // Retrieve the Profiler.Print() output to get the workload execution
1280  std::stringstream ss;
1281  profilerManager.GetProfiler()->Print(ss);;
1282  std::string dump = ss.str();
1283 
1284  // Executed Subtraction using GpuAcc
1285  std::size_t found = dump.find("ClSubtractionWorkload_Execute");
1286  CHECK(found != std::string::npos);
1287 
1288  // Correctly switch back to CpuAcc
1289  found = dump.find("NeonPooling2dWorkload_Execute");
1290  CHECK(found != std::string::npos);
1291 
1292  // Contain CopyMemGeneric
1293  found = dump.find("CopyMemGeneric");
1294  CHECK(found != std::string::npos);
1295 
1296  // Check output is as expected
1297  CHECK(outputData == expectedOutput);
1298 }
1299 #endif
1300 
1301 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:49
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:66
BackendIdSet GetBackendIds() const
std::unordered_set< BackendId > BackendIdSet
Definition: BackendId.hpp:193
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:572
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
Definition: GraphUtils.cpp:68
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
uint32_t m_PoolWidth
Pooling width value.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:33
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:609
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
Copyright (c) 2021 ARM Limited and Contributors.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
constexpr const char * MockImportBackendId()
IProfiler * GetProfiler()
Definition: Profiling.cpp:584
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
uint32_t m_PoolHeight
Pooling height value.
TEST_SUITE("NeonFallback")
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1847
int NetworkId
Definition: IRuntime.hpp:27
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:271
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:242
GPU Execution: OpenCL: ArmCompute.
ArmNN performs an optimization on each model/network before it gets loaded for execution.
Definition: INetwork.hpp:137
const BackendId & GetBackendId() const
Definition: Layer.hpp:275
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:49
CPU Execution: NEON: ArmCompute.
armnn::IBackendInternalUniquePtr CreateBackendObject(const armnn::BackendId &backendId)
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:514
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:241
virtual int Connect(IInputSlot &destination)=0
A Pooling2dDescriptor for the Pooling2dLayer.
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:476
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
unsigned int GetNumElements() const
Definition: Tensor.hpp:196