ArmNN
 21.08
NeonFallbackTests.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
8 
9 #include <test/GraphUtils.hpp>
10 
11 #include <doctest/doctest.h>
12 
13 TEST_SUITE("NeonFallback")
14 {
15 TEST_CASE("FallbackImportToCpuAcc")
16 {
17  using namespace armnn;
18 
19  // Create a mock backend objectN
20  MockImportBackendInitialiser initialiser; // Register the Mock Backend
21  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
22  CHECK((backendObjPtr != nullptr));
23 
25  if (backendIds.find("MockRef") == backendIds.end())
26  {
27  std::string message = "Cannot load MockRef";
28  FAIL(message);
29  }
30 
31  // Create runtime in which test will run and allow fallback to CpuRef.
33  IRuntimePtr runtime(IRuntime::Create(options));
34 
35  // Builds up the structure of the network.
37 
38  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
39  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
40  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
41  IConnectableLayer* add = net->AddAdditionLayer("add");
42  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
43  IConnectableLayer* output = net->AddOutputLayer(0, "output");
44 
45  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
46  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
47  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
48  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
49  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
50 
51  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
52 
53  input0->GetOutputSlot(0).SetTensorInfo(info);
54  input1->GetOutputSlot(0).SetTensorInfo(info);
55  input2->GetOutputSlot(0).SetTensorInfo(info);
56  add->GetOutputSlot(0).SetTensorInfo(info);
57  sub->GetOutputSlot(0).SetTensorInfo(info);
58 
59  // optimize the network
60  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
61  OptimizerOptions optOptions;
62  optOptions.m_ImportEnabled = true;
63  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
64 
65  Graph& graph = GetGraphForTesting(optNet.get());
66 
67  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
68  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
69  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
70  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
71  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
72  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
73  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
74 
75  // Checks order is valid.
76  CHECK(CheckOrder(graph, layer0, layer1));
77  CHECK(CheckOrder(graph, layer1, layer2));
78  CHECK(CheckOrder(graph, layer2, layer3));
79  CHECK(CheckOrder(graph, layer3, layer4));
80  CHECK(CheckOrder(graph, layer4, layer5));
81  CHECK(CheckOrder(graph, layer5, layer6));
82 
83  // Load it into the runtime. It should pass.
84  NetworkId netId;
85  std::string ignoredErrorMessage;
87  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
88 
89  // Creates structures for input & output
90  std::vector<float> inputData0
91  {
92  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
93  };
94  std::vector<float> inputData1
95  {
96  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
97  };
98  std::vector<float> inputData2
99  {
100  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
101  };
102 
103  std::vector<float> outputData(12);
104 
105  std::vector<float> expectedOutput
106  {
107  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
108  };
109 
110  InputTensors inputTensors
111  {
112  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
113  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
114  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
115  };
116  OutputTensors outputTensors
117  {
118  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
119  };
120 
121  runtime->GetProfiler(netId)->EnableProfiling(true);
122 
123  // Do the inference
124  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
125 
126  // Retrieve the Profiler.Print() output to get the workload execution
128  std::stringstream ss;
129  profilerManager.GetProfiler()->Print(ss);;
130  std::string dump = ss.str();
131 
132  // Contains ImportMemGeneric
133  std::size_t found = dump.find("ImportMemGeneric");
134  CHECK(found != std::string::npos);
135 
136  // Contains SyncMemGeneric
137  found = dump.find("SyncMemGeneric");
138  CHECK(found != std::string::npos);
139 
140  // Does not contain CopyMemGeneric
141  found = dump.find("CopyMemGeneric");
142  CHECK(found == std::string::npos);
143 
144  // Use memory import between backends
145  CHECK((layer4->GetType() == LayerType::MemImport));
146 
147  // Check output is as expected
148  CHECK(outputData == expectedOutput);
149 }
150 
151 TEST_CASE("FallbackPaddingCopyToCpuAcc")
152 {
153  using namespace armnn;
154 
155  // Create a mock backend object
156  MockImportBackendInitialiser initialiser; // Register the Mock Backend
157  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
158  CHECK((backendObjPtr != nullptr));
159 
161  if (backendIds.find("MockRef") == backendIds.end())
162  {
163  std::string message = "Cannot load MockRef";
164  FAIL(message);
165  }
166 
167  // Create runtime in which test will run and allow fallback to CpuRef.
169  IRuntimePtr runtime(IRuntime::Create(options));
170 
171  // Builds up the structure of the network.
173 
174  Pooling2dDescriptor desc;
175 
176  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
177  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
178  IConnectableLayer* add = net->AddAdditionLayer("add");
179  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
180  IConnectableLayer* output = net->AddOutputLayer(0, "output");
181 
182  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
183  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
184  add->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
185  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
186 
187  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
188  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
189 
190  input0->GetOutputSlot(0).SetTensorInfo(info);
191  input1->GetOutputSlot(0).SetTensorInfo(info);
192  add->GetOutputSlot(0).SetTensorInfo(info);
193  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
194 
195  // optimize the network
196  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
197  OptimizerOptions optOptions;
198  optOptions.m_ImportEnabled = true;
199  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
200 
201  Graph& graph = GetGraphForTesting(optNet.get());
202 
203  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
204  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
205  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "add");
206  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ add (0) -> pooling (0) ]");
207  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "pooling");
208  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output");
209 
210  // Checks order is valid.
211  CHECK(CheckOrder(graph, layer0, layer1));
212  CHECK(CheckOrder(graph, layer1, layer2));
213  CHECK(CheckOrder(graph, layer2, layer3));
214  CHECK(CheckOrder(graph, layer3, layer4));
215  CHECK(CheckOrder(graph, layer4, layer5));
216 
217  // Load it into the runtime. It should pass.
218  NetworkId netId;
219  std::string ignoredErrorMessage;
221 
222  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
223 
224  // Creates structures for input & output
225  std::vector<float> inputData0
226  {
227  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
228  };
229  std::vector<float> inputData1
230  {
231  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
232  };
233 
234  std::vector<float> outputData(2);
235 
236  std::vector<float> expectedOutput
237  {
238  6.0f, 12.0f
239  };
240 
241  InputTensors inputTensors
242  {
243  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
244  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }
245  };
246  OutputTensors outputTensors
247  {
248  { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
249  };
250 
251  runtime->GetProfiler(netId)->EnableProfiling(true);
252 
253  // Do the inference
254  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
255 
256  // Retrieve the Profiler.Print() output to get the workload execution
258  std::stringstream ss;
259  profilerManager.GetProfiler()->Print(ss);;
260  std::string dump = ss.str();
261 
262  // Contains CopyMemGeneric between the backends
263  std::size_t found = dump.find("CopyMemGeneric");
264  CHECK(found != std::string::npos);
265 
266  // Contains SyncMemGeneric for the output
267  found = dump.find("SyncMemGeneric");
268  CHECK(found != std::string::npos);
269 
270  // Does not contain ImportMemGeneric
271  found = dump.find("ImportMemGeneric");
272  CHECK(found == std::string::npos);
273 
274  // Use memory import between backends
275  CHECK((layer3->GetType() == LayerType::MemCopy));
276 
277  // Check output is as expected
278  CHECK(outputData == expectedOutput);
279 }
280 
281 TEST_CASE("FallbackImportFromCpuAcc")
282 {
283  using namespace armnn;
284 
285  // Create a mock backend object
286  MockImportBackendInitialiser initialiser; // Register the Mock Backend
287  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
288  CHECK((backendObjPtr != nullptr));
289 
291  if (backendIds.find("MockRef") == backendIds.end())
292  {
293  std::string message = "Cannot load MockRef";
294  FAIL(message);
295  }
296 
297  // Create runtime in which test will run and allow fallback to CpuRef.
299  IRuntimePtr runtime(IRuntime::Create(options));
300 
301  // Builds up the structure of the network.
303 
304  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
305  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
306  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
307  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
308  IConnectableLayer* add = net->AddAdditionLayer("add");
309  IConnectableLayer* output = net->AddOutputLayer(0, "output");
310 
311  input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
312  input1->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
313  input2->GetOutputSlot(0).Connect(add->GetInputSlot(0));
314  sub->GetOutputSlot(0).Connect(add->GetInputSlot(1));
315  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
316 
317  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
318 
319  input0->GetOutputSlot(0).SetTensorInfo(info);
320  input1->GetOutputSlot(0).SetTensorInfo(info);
321  input2->GetOutputSlot(0).SetTensorInfo(info);
322  sub->GetOutputSlot(0).SetTensorInfo(info);
323  add->GetOutputSlot(0).SetTensorInfo(info);
324 
325  // optimize the network
326  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
327  OptimizerOptions optOptions;
328  optOptions.m_ImportEnabled = true;
329  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
330 
331  Graph& graph = GetGraphForTesting(optNet.get());
332 
333  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
334  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
335  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
336  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "sub");
337  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ sub (0) -> add (1) ]");
338  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "add");
339  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
340 
341  // Checks order is valid.
342  CHECK(CheckOrder(graph, layer0, layer1));
343  CHECK(CheckOrder(graph, layer1, layer2));
344  CHECK(CheckOrder(graph, layer2, layer3));
345  CHECK(CheckOrder(graph, layer3, layer4));
346  CHECK(CheckOrder(graph, layer4, layer5));
347  CHECK(CheckOrder(graph, layer5, layer6));
348 
349  // Load it into the runtime. It should pass.
350  NetworkId netId;
351  std::string ignoredErrorMessage;
352 
354  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
355 
356  // Creates structures for input & output
357  std::vector<float> inputData0
358  {
359  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
360  };
361  std::vector<float> inputData1
362  {
363  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
364  };
365  std::vector<float> inputData2
366  {
367  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
368  };
369 
370  std::vector<float> outputData(12);
371 
372  std::vector<float> expectedOutput
373  {
374  13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
375  };
376 
377  InputTensors inputTensors
378  {
379  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
380  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
381  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
382  };
383  OutputTensors outputTensors
384  {
385  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
386  };
387 
388  runtime->GetProfiler(netId)->EnableProfiling(true);
389 
390  // Do the inference
391  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
392 
393  // Retrieve the Profiler.Print() output to get the workload execution
395  std::stringstream ss;
396  profilerManager.GetProfiler()->Print(ss);;
397  std::string dump = ss.str();
398 
399  // Contains ImportMemGeneric
400  std::size_t found = dump.find("ImportMemGeneric");
401  CHECK(found != std::string::npos);
402 
403  // Contains SyncMemGeneric
404  found = dump.find("SyncMemGeneric");
405  CHECK(found != std::string::npos);
406 
407  // Does not contain CopyMemGeneric
408  found = dump.find("CopyMemGeneric");
409  CHECK(found == std::string::npos);
410 
411  // Use memory import between backends
412  CHECK((layer4->GetType() == LayerType::MemImport));
413 
414  // Check output is as expected
415  CHECK(outputData == expectedOutput);
416 }
417 
418 TEST_CASE("FallbackPaddingCopyFromCpuAcc")
419 {
420  using namespace armnn;
421 
422  // Create a mock backend object
423  MockImportBackendInitialiser initialiser; // Register the Mock Backend
424  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
425  CHECK((backendObjPtr != nullptr));
426 
428  if (backendIds.find("MockRef") == backendIds.end())
429  {
430  std::string message = "Cannot load MockRef";
431  FAIL(message);
432  }
433 
434  // Create runtime in which test will run and allow fallback to CpuRef.
436  IRuntimePtr runtime(IRuntime::Create(options));
437 
438  // Builds up the structure of the network.
440 
441  Pooling2dDescriptor desc;
442 
443  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
444  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
445  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
446  IConnectableLayer* add = net->AddAdditionLayer("add");
447  IConnectableLayer* output = net->AddOutputLayer(0, "output");
448 
449  input0->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
450  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
451  pooling->GetOutputSlot(0).Connect(add->GetInputSlot(0));
452  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
453 
454  TensorInfo inputInfo = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
455  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
456 
457  input0->GetOutputSlot(0).SetTensorInfo(inputInfo);
458  input1->GetOutputSlot(0).SetTensorInfo(poolingInfo);
459  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
460  add->GetOutputSlot(0).SetTensorInfo(poolingInfo);
461 
462  // optimize the network
463  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
464  OptimizerOptions optOptions;
465  optOptions.m_ImportEnabled = true;
466  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
467 
468  Graph& graph = GetGraphForTesting(optNet.get());
469 
470  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
471  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
472  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "pooling");
473  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ pooling (0) -> add (0) ]");
474  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "add");
475  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output");
476 
477  // Checks order is valid.
478  CHECK(CheckOrder(graph, layer0, layer1));
479  CHECK(CheckOrder(graph, layer1, layer2));
480  CHECK(CheckOrder(graph, layer2, layer3));
481  CHECK(CheckOrder(graph, layer3, layer4));
482  CHECK(CheckOrder(graph, layer4, layer5));
483 
484  // Load it into the runtime. It should pass.
485  NetworkId netId;
486  std::string ignoredErrorMessage;
488 
489  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
490 
491  // Creates structures for input & output
492  std::vector<float> inputData0
493  {
494  1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f
495  };
496  std::vector<float> inputData1
497  {
498  -1.0f, 3.0f
499  };
500 
501  std::vector<float> outputData(2);
502 
503  std::vector<float> expectedOutput
504  {
505  5.0f, 15.0f
506  };
507 
508  InputTensors inputTensors
509  {
510  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
511  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }
512  };
513  OutputTensors outputTensors
514  {
515  { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
516  };
517 
518  runtime->GetProfiler(netId)->EnableProfiling(true);
519 
520  // Do the inference
521  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
522 
523  // Retrieve the Profiler.Print() output to get the workload execution
525  std::stringstream ss;
526  profilerManager.GetProfiler()->Print(ss);;
527  std::string dump = ss.str();
528 
529  // Contains CopyMemGeneric between the backends
530  std::size_t found = dump.find("CopyMemGeneric");
531  CHECK(found != std::string::npos);
532 
533  // Contains SyncMemGeneric for the output
534  found = dump.find("SyncMemGeneric");
535  CHECK(found != std::string::npos);
536 
537  // Does not contain ImportMemGeneric
538  found = dump.find("ImportMemGeneric");
539  CHECK(found == std::string::npos);
540 
541  // Use memory import between backends
542  CHECK((layer3->GetType() == LayerType::MemCopy));
543 
544  // Check output is as expected
545  CHECK(outputData == expectedOutput);
546 }
547 
548 TEST_CASE("FallbackDisableImportFromCpuAcc")
549 {
550  using namespace armnn;
551 
552  // Create a mock backend object
553  MockImportBackendInitialiser initialiser; // Register the Mock Backend
554  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
555  CHECK((backendObjPtr != nullptr));
556 
558  if (backendIds.find("MockRef") == backendIds.end())
559  {
560  std::string message = "Cannot load MockRef";
561  FAIL(message);
562  }
563 
564  // Create runtime in which test will run and allow fallback to CpuRef.
566  IRuntimePtr runtime(IRuntime::Create(options));
567 
568  // Builds up the structure of the network.
570 
571  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
572  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
573  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
574  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
575  IConnectableLayer* add = net->AddAdditionLayer("add");
576  IConnectableLayer* output = net->AddOutputLayer(0, "output");
577 
578  input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
579  input1->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
580  input2->GetOutputSlot(0).Connect(add->GetInputSlot(0));
581  sub->GetOutputSlot(0).Connect(add->GetInputSlot(1));
582  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
583 
584  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
585 
586  input0->GetOutputSlot(0).SetTensorInfo(info);
587  input1->GetOutputSlot(0).SetTensorInfo(info);
588  input2->GetOutputSlot(0).SetTensorInfo(info);
589  sub->GetOutputSlot(0).SetTensorInfo(info);
590  add->GetOutputSlot(0).SetTensorInfo(info);
591 
592  // optimize the network
593  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
594  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
595 
596  Graph& graph = GetGraphForTesting(optNet.get());
597 
598  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
599  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
600  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
601  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "sub");
602  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ sub (0) -> add (1) ]");
603  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "add");
604  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
605 
606  // Checks order is valid.
607  CHECK(CheckOrder(graph, layer0, layer1));
608  CHECK(CheckOrder(graph, layer1, layer2));
609  CHECK(CheckOrder(graph, layer2, layer3));
610  CHECK(CheckOrder(graph, layer3, layer4));
611  CHECK(CheckOrder(graph, layer4, layer5));
612  CHECK(CheckOrder(graph, layer5, layer6));
613 
614  // Load it into the runtime. It should pass.
615  NetworkId netId;
616  std::string ignoredErrorMessage;
618 
619  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
620 
621  // Creates structures for input & output
622  std::vector<float> inputData0
623  {
624  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
625  };
626  std::vector<float> inputData1
627  {
628  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
629  };
630  std::vector<float> inputData2
631  {
632  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
633  };
634 
635  std::vector<float> outputData(12);
636 
637  std::vector<float> expectedOutput
638  {
639  13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
640  };
641 
642  InputTensors inputTensors
643  {
644  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
645  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
646  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
647  };
648  OutputTensors outputTensors
649  {
650  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
651  };
652 
653  runtime->GetProfiler(netId)->EnableProfiling(true);
654 
655  // Do the inference
656  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
657 
658  // Retrieve the Profiler.Print() output to get the workload execution
660  std::stringstream ss;
661  profilerManager.GetProfiler()->Print(ss);;
662  std::string dump = ss.str();
663 
664  // Contains CopyMemGeneric between the backends
665  std::size_t found = dump.find("CopyMemGeneric");
666  CHECK(found != std::string::npos);
667 
668  // Does not contain ImportMemGeneric
669  found = dump.find("ImportMemGeneric");
670  CHECK(found == std::string::npos);
671 
672  // Use memory import between backends
673  CHECK((layer4->GetType() == LayerType::MemCopy));
674 
675  // Check output is as expected
676  CHECK(outputData == expectedOutput);
677 }
678 
679 #if defined(ARMCOMPUTECL_ENABLED)
680 TEST_CASE("NeonImportEnabledFallbackToCl")
681 {
682  using namespace armnn;
683 
685  IRuntimePtr runtime(IRuntime::Create(options));
686 
687  // Builds up the structure of the network.
689 
690  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
691  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
692  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
693  IConnectableLayer* add = net->AddAdditionLayer("add");
694  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
695  IConnectableLayer* output = net->AddOutputLayer(0, "output");
696 
697  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
698  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
699  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
700  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
701  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
702 
703  TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
704 
705  input0->GetOutputSlot(0).SetTensorInfo(info);
706  input1->GetOutputSlot(0).SetTensorInfo(info);
707  input2->GetOutputSlot(0).SetTensorInfo(info);
708  add->GetOutputSlot(0).SetTensorInfo(info);
709  sub->GetOutputSlot(0).SetTensorInfo(info);
710 
711  std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
712  // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
713  sub->BackendSelectionHint(backends[1]);
714 
715  // optimize the network
716  OptimizerOptions optOptions;
717  optOptions.m_ImportEnabled = true;
718  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
719 
720  Graph& graph = GetGraphForTesting(optNet.get());
721 
722  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
723  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
724  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
725  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
726  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
727  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
728  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
729 
730  // Checks order is valid.
731  CHECK(CheckOrder(graph, layer0, layer1));
732  CHECK(CheckOrder(graph, layer1, layer2));
733  CHECK(CheckOrder(graph, layer2, layer3));
734  CHECK(CheckOrder(graph, layer3, layer4));
735  CHECK(CheckOrder(graph, layer4, layer5));
736  CHECK(CheckOrder(graph, layer5, layer6));
737 
738  // Use memory import between backends
739  CHECK((layer4->GetType() == LayerType::MemCopy));
740 
741  // Correctly use backend hint
742  CHECK((layer5->GetBackendId() == Compute::GpuAcc ));
743 
744  // Load it into the runtime. It should pass.
745  NetworkId netId;
746  std::string ignoredErrorMessage;
747 
749 
750  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
751 
752  // Creates structures for input & output
753  std::vector<float> inputData0
754  {
755  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
756  };
757  std::vector<float> inputData1
758  {
759  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
760  };
761  std::vector<float> inputData2
762  {
763  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
764  };
765 
766  std::vector<float> outputData(16);
767 
768  std::vector<float> expectedOutput
769  {
770  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f, 11.0f, 9.0f, 7.0f, 5.0f
771  };
772 
773  // Creates structures for input & output
774  unsigned int numElements = info.GetNumElements();
775  size_t totalBytes = numElements * sizeof(float);
776 
777  // Prepare aligned data
778  const size_t alignment = 64;
779  size_t space = totalBytes + alignment + alignment;
780  auto inputData = std::make_unique<uint8_t[]>(space);
781  void* alignedInputPtr = inputData.get();
782  CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
783 
784  auto* intputPtr = reinterpret_cast<float*>(alignedInputPtr);
785  std::copy(inputData2.begin(), inputData2.end(), intputPtr);
786 
787  InputTensors inputTensors
788  {
789  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
790  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
791  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), alignedInputPtr) }
792  };
793  OutputTensors outputTensors
794  {
795  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
796  };
797 
798  runtime->GetProfiler(netId)->EnableProfiling(true);
799 
800  // Do the inference
801  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
802 
803  // Retrieve the Profiler.Print() output to get the workload execution
805  std::stringstream ss;
806  profilerManager.GetProfiler()->Print(ss);;
807  std::string dump = ss.str();
808 
809  // Executed Subtraction using GpuAcc
810  std::size_t found = dump.find("ClSubtractionWorkload_Execute");
811  CHECK(found != std::string::npos);
812 
813  // Contain CopyMemGeneric
814  found = dump.find("CopyMemGeneric");
815  CHECK(found != std::string::npos);
816 
817  // Check output is as expected
818  for(unsigned int i = 0; i < numElements; ++i)
819  {
820  CHECK(outputData[i] == expectedOutput[i]);
821  }
822  runtime->UnloadNetwork(netId);
823 }
824 
825 TEST_CASE("NeonImportDisabledFallbackToCl")
826 {
827  using namespace armnn;
828 
830  IRuntimePtr runtime(IRuntime::Create(options));
831 
832  // Builds up the structure of the network.
834 
835  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
836  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
837  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
838  IConnectableLayer* add = net->AddAdditionLayer("add");
839  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
840  IConnectableLayer* output = net->AddOutputLayer(0, "output");
841 
842  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
843  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
844  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
845  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
846  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
847 
848  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
849 
850  input0->GetOutputSlot(0).SetTensorInfo(info);
851  input1->GetOutputSlot(0).SetTensorInfo(info);
852  input2->GetOutputSlot(0).SetTensorInfo(info);
853  add->GetOutputSlot(0).SetTensorInfo(info);
854  sub->GetOutputSlot(0).SetTensorInfo(info);
855 
856  std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
857  // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
858  sub->BackendSelectionHint(backends[1]);
859 
860  // optimize the network
861  OptimizerOptions optOptions;
862  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
863 
864  Graph& graph = GetGraphForTesting(optNet.get());
865 
866  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
867  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
868  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
869  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
870  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
871  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
872  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
873 
874  // Checks order is valid.
875  CHECK(CheckOrder(graph, layer0, layer1));
876  CHECK(CheckOrder(graph, layer1, layer2));
877  CHECK(CheckOrder(graph, layer2, layer3));
878  CHECK(CheckOrder(graph, layer3, layer4));
879  CHECK(CheckOrder(graph, layer4, layer5));
880  CHECK(CheckOrder(graph, layer5, layer6));
881 
882  // Use memory import between backends
883  CHECK((layer4->GetType() == LayerType::MemCopy));
884 
885  // Correctly use backend hint
886  CHECK((layer5->GetBackendId() == Compute::GpuAcc ));
887 
888  // Load it into the runtime. It should pass.
889  NetworkId netId;
890  runtime->LoadNetwork(netId, std::move(optNet));
891 
892  // Creates structures for input & output
893  std::vector<float> inputData0
894  {
895  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
896  };
897  std::vector<float> inputData1
898  {
899  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
900  };
901  std::vector<float> inputData2
902  {
903  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
904  };
905 
906  std::vector<float> outputData(12);
907 
908  std::vector<float> expectedOutput
909  {
910  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
911  };
912 
913  InputTensors inputTensors
914  {
915  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
916  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
917  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
918  };
919  OutputTensors outputTensors
920  {
921  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
922  };
923 
924  runtime->GetProfiler(netId)->EnableProfiling(true);
925 
926  // Do the inference
927  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
928 
929  // Retrieve the Profiler.Print() output to get the workload execution
931  std::stringstream ss;
932  profilerManager.GetProfiler()->Print(ss);;
933  std::string dump = ss.str();
934 
935  // Executed Subtraction using GpuAcc
936  std::size_t found = dump.find("ClSubtractionWorkload_Execute");
937  CHECK(found != std::string::npos);
938 
939  // Contain CopyMemGeneric
940  found = dump.find("CopyMemGeneric");
941  CHECK(found != std::string::npos);
942 
943  // Check output is as expected
944  CHECK(outputData == expectedOutput);
945 }
946 
947 TEST_CASE("NeonImportEnabledFallbackSubgraphToCl")
948 {
949  using namespace armnn;
950 
952  IRuntimePtr runtime(IRuntime::Create(options));
953 
954  // Builds up the structure of the network.
956 
957  Pooling2dDescriptor desc;
958  desc.m_PoolWidth = 2;
959  desc.m_PoolHeight = 2;
960  desc.m_StrideX = 2;
961  desc.m_StrideY = 2;
962 
963  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
964  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
965  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
966  IConnectableLayer* add = net->AddAdditionLayer("add");
967  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
968  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
969  IConnectableLayer* output = net->AddOutputLayer(0, "output");
970 
971  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
972  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
973  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
974  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
975  sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
976  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
977 
978  TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
979  TensorInfo poolingInfo = TensorInfo({ 1, 2, 2, 1 }, DataType::Float32);
980 
981  input0->GetOutputSlot(0).SetTensorInfo(info);
982  input1->GetOutputSlot(0).SetTensorInfo(info);
983  input2->GetOutputSlot(0).SetTensorInfo(info);
984  add->GetOutputSlot(0).SetTensorInfo(info);
985  sub->GetOutputSlot(0).SetTensorInfo(info);
986  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
987 
988  std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
989  // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
990  sub->BackendSelectionHint(backends[1]);
991 
992  // optimize the network
993  OptimizerOptions optOptions;
994  optOptions.m_ImportEnabled = true;
995  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
996 
997  Graph& graph = GetGraphForTesting(optNet.get());
998 
999  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
1000  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
1001  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
1002  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
1003  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
1004  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
1005  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
1006  armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
1007  armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
1008 
1009  // Checks order is valid.
1010  CHECK(CheckOrder(graph, layer0, layer1));
1011  CHECK(CheckOrder(graph, layer1, layer2));
1012  CHECK(CheckOrder(graph, layer2, layer3));
1013  CHECK(CheckOrder(graph, layer3, layer4));
1014  CHECK(CheckOrder(graph, layer4, layer5));
1015  CHECK(CheckOrder(graph, layer5, layer6));
1016  CHECK(CheckOrder(graph, layer6, layer7));
1017  CHECK(CheckOrder(graph, layer7, layer8));
1018 
1019  // Use memory import between backends
1020  CHECK((layer4->GetType() == LayerType::MemCopy));
1021  CHECK((layer6->GetType() == LayerType::MemCopy));
1022 
1023  // Correctly use backend hint
1024  CHECK((layer5->GetBackendId() == Compute::GpuAcc ));
1025 
1026  // Load it into the runtime. It should pass.
1027  NetworkId netId;
1028  std::string ignoredErrorMessage;
1029 
1031 
1032  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
1033 
1034  // Creates structures for input & output
1035  std::vector<float> inputData0
1036  {
1037  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
1038  };
1039  std::vector<float> inputData1
1040  {
1041  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
1042  };
1043  std::vector<float> inputData2
1044  {
1045  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
1046  };
1047 
1048  std::vector<float> outputData(4);
1049 
1050  std::vector<float> expectedOutput{ 11.0f, 3.0f, -5.0f, 11.0f };
1051 
1052  // Prepare aligned data
1053  unsigned int numElements = info.GetNumElements();
1054  size_t totalBytes = numElements * sizeof(float);
1055  const size_t alignment = 64;
1056  size_t space = totalBytes + alignment + alignment;
1057  auto inputData = std::make_unique<uint8_t[]>(space);
1058  void* alignedInputPtr = inputData.get();
1059  CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
1060 
1061  auto* intputPtr = reinterpret_cast<float*>(alignedInputPtr);
1062  std::copy(inputData2.begin(), inputData2.end(), intputPtr);
1063 
1064  InputTensors inputTensors
1065  {
1066  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
1067  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
1068  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), alignedInputPtr) }
1069  };
1070  OutputTensors outputTensors
1071  {
1072  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1073  };
1074 
1075  runtime->GetProfiler(netId)->EnableProfiling(true);
1076 
1077  // Do the inference
1078  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1079 
1080  // Retrieve the Profiler.Print() output to get the workload execution
1082  std::stringstream ss;
1083  profilerManager.GetProfiler()->Print(ss);;
1084  std::string dump = ss.str();
1085 
1086  // Executed Subtraction using GpuAcc
1087  std::size_t found = dump.find("ClSubtractionWorkload_Execute");
1088  CHECK(found != std::string::npos);
1089 
1090  // Correctly switch back to CpuAcc
1091  found = dump.find("NeonPooling2dWorkload_Execute");
1092  CHECK(found != std::string::npos);
1093 
1094  // Contain CopyMemGeneric
1095  found = dump.find("CopyMemGeneric");
1096  CHECK(found != std::string::npos);
1097 
1098  // Contains SyncMemGeneric for output
1099  found = dump.find("SyncMemGeneric");
1100  CHECK(found != std::string::npos);
1101 
1102  // Check output is as expected
1103  CHECK(outputData == expectedOutput);
1104  runtime->UnloadNetwork(netId);
1105 }
1106 
1107 TEST_CASE("NeonImportDisableFallbackSubgraphToCl")
1108 {
1109  using namespace armnn;
1110 
1111  IRuntime::CreationOptions options;
1112  IRuntimePtr runtime(IRuntime::Create(options));
1113 
1114  // Builds up the structure of the network.
1116 
1117  Pooling2dDescriptor desc;
1118 
1119  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
1120  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
1121  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
1122  IConnectableLayer* add = net->AddAdditionLayer("add");
1123  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
1124  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
1125  IConnectableLayer* output = net->AddOutputLayer(0, "output");
1126 
1127  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
1128  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
1129  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
1130  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
1131  sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
1132  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1133 
1134  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
1135  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
1136 
1137  input0->GetOutputSlot(0).SetTensorInfo(info);
1138  input1->GetOutputSlot(0).SetTensorInfo(info);
1139  input2->GetOutputSlot(0).SetTensorInfo(info);
1140  add->GetOutputSlot(0).SetTensorInfo(info);
1141  sub->GetOutputSlot(0).SetTensorInfo(info);
1142  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
1143 
1144  std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
1145  // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
1146  sub->BackendSelectionHint(backends[1]);
1147 
1148  // optimize the network
1149  OptimizerOptions optOptions;
1150  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
1151 
1152  Graph& graph = GetGraphForTesting(optNet.get());
1153 
1154  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
1155  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
1156  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
1157  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
1158  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
1159  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
1160  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
1161  armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
1162  armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
1163 
1164  // Checks order is valid.
1165  CHECK(CheckOrder(graph, layer0, layer1));
1166  CHECK(CheckOrder(graph, layer1, layer2));
1167  CHECK(CheckOrder(graph, layer2, layer3));
1168  CHECK(CheckOrder(graph, layer3, layer4));
1169  CHECK(CheckOrder(graph, layer4, layer5));
1170  CHECK(CheckOrder(graph, layer5, layer6));
1171  CHECK(CheckOrder(graph, layer6, layer7));
1172  CHECK(CheckOrder(graph, layer7, layer8));
1173 
1174  // Use memory import between backends
1175  CHECK((layer4->GetType() == LayerType::MemCopy));
1176  CHECK((layer6->GetType() == LayerType::MemCopy));
1177 
1178  // Correctly use backend hint
1179  CHECK((layer5->GetBackendId() == Compute::GpuAcc ));
1180 
1181  // Load it into the runtime. It should pass.
1182  NetworkId netId;
1183  runtime->LoadNetwork(netId, std::move(optNet));
1184 
1185  // Creates structures for input & output
1186  std::vector<float> inputData0
1187  {
1188  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
1189  };
1190  std::vector<float> inputData1
1191  {
1192  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
1193  };
1194  std::vector<float> inputData2
1195  {
1196  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
1197  };
1198 
1199  std::vector<float> outputData(2);
1200 
1201  std::vector<float> expectedOutput{ 11.0f, -1.0f };
1202 
1203  InputTensors inputTensors
1204  {
1205  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
1206  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
1207  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
1208  };
1209  OutputTensors outputTensors
1210  {
1211  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1212  };
1213 
1214  runtime->GetProfiler(netId)->EnableProfiling(true);
1215 
1216  // Do the inference
1217  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1218 
1219  // Retrieve the Profiler.Print() output to get the workload execution
1221  std::stringstream ss;
1222  profilerManager.GetProfiler()->Print(ss);;
1223  std::string dump = ss.str();
1224 
1225  // Executed Subtraction using GpuAcc
1226  std::size_t found = dump.find("ClSubtractionWorkload_Execute");
1227  CHECK(found != std::string::npos);
1228 
1229  // Correctly switch back to CpuAcc
1230  found = dump.find("NeonPooling2dWorkload_Execute");
1231  CHECK(found != std::string::npos);
1232 
1233  // Contain CopyMemGeneric
1234  found = dump.find("CopyMemGeneric");
1235  CHECK(found != std::string::npos);
1236 
1237  // Check output is as expected
1238  CHECK(outputData == expectedOutput);
1239 }
1240 #endif
1241 
1242 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:39
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
BackendIdSet GetBackendIds() const
std::unordered_set< BackendId > BackendIdSet
Definition: BackendId.hpp:191
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:526
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
Definition: GraphUtils.cpp:68
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
uint32_t m_PoolWidth
Pooling width value.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:30
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:563
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:360
Copyright (c) 2021 ARM Limited and Contributors.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
constexpr const char * MockImportBackendId()
IProfiler * GetProfiler()
Definition: Profiling.cpp:538
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
uint32_t m_PoolHeight
Pooling height value.
TEST_SUITE("NeonFallback")
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1613
int NetworkId
Definition: IRuntime.hpp:24
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:361
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:265
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:173
GPU Execution: OpenCL: ArmCompute.
const BackendId & GetBackendId() const
Definition: Layer.hpp:269
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:25
CPU Execution: NEON: ArmCompute.
armnn::IBackendInternalUniquePtr CreateBackendObject(const armnn::BackendId &backendId)
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:172
virtual int Connect(IInputSlot &destination)=0
A Pooling2dDescriptor for the Pooling2dLayer.
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:530
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
unsigned int GetNumElements() const
Definition: Tensor.hpp:196