ArmNN
 22.08
NeonFallbackTests.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include <CommonTestUtils.hpp>
8 
9 #include <GraphUtils.hpp>
10 
11 #include <doctest/doctest.h>
12 
13 TEST_SUITE("NeonFallback")
14 {
15 TEST_CASE("FallbackImportToCpuAcc")
16 {
17  using namespace armnn;
18 
19  // Create a mock backend objectN
20  MockImportBackendInitialiser initialiser; // Register the Mock Backend
21  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
22  CHECK((backendObjPtr != nullptr));
23 
25  if (backendIds.find("MockRef") == backendIds.end())
26  {
27  std::string message = "Cannot load MockRef";
28  FAIL(message);
29  }
30 
31  // Create runtime in which test will run and allow fallback to CpuRef.
33  IRuntimePtr runtime(IRuntime::Create(options));
34 
35  // Builds up the structure of the network.
37 
38  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
39  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
40  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
41  IConnectableLayer* add = net->AddAdditionLayer("add");
42  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
43  IConnectableLayer* output = net->AddOutputLayer(0, "output");
44 
45  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
46  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
47  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
48  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
49  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
50 
51  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
52 
53  input0->GetOutputSlot(0).SetTensorInfo(info);
54  input1->GetOutputSlot(0).SetTensorInfo(info);
55  input2->GetOutputSlot(0).SetTensorInfo(info);
56  add->GetOutputSlot(0).SetTensorInfo(info);
57  sub->GetOutputSlot(0).SetTensorInfo(info);
58 
59  // optimize the network
60  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
61  OptimizerOptions optOptions;
62  optOptions.m_ImportEnabled = true;
63  optOptions.m_ExportEnabled = true;
64  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
65 
66  Graph& graph = GetGraphForTesting(optNet.get());
67 
68  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
69  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
70  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
71  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
72  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
73  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
74  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
75 
76  // Checks order is valid.
77  CHECK(CheckOrder(graph, layer0, layer1));
78  CHECK(CheckOrder(graph, layer1, layer2));
79  CHECK(CheckOrder(graph, layer2, layer3));
80  CHECK(CheckOrder(graph, layer3, layer4));
81  CHECK(CheckOrder(graph, layer4, layer5));
82  CHECK(CheckOrder(graph, layer5, layer6));
83 
84  // Load it into the runtime. It should pass.
85  NetworkId netId;
86  std::string ignoredErrorMessage;
88  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
89 
90  // Creates structures for input & output
91  std::vector<float> inputData0
92  {
93  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
94  };
95  std::vector<float> inputData1
96  {
97  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
98  };
99  std::vector<float> inputData2
100  {
101  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
102  };
103 
104  std::vector<float> outputData(12);
105 
106  std::vector<float> expectedOutput
107  {
108  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
109  };
110 
111  armnn::TensorInfo inputTensorInfo0 = runtime->GetInputTensorInfo(netId, 0);
112  armnn::TensorInfo inputTensorInfo1 = runtime->GetInputTensorInfo(netId, 1);
113  armnn::TensorInfo inputTensorInfo2 = runtime->GetInputTensorInfo(netId, 2);
114  inputTensorInfo0.SetConstant(true);
115  inputTensorInfo1.SetConstant(true);
116  inputTensorInfo2.SetConstant(true);
117 
118  InputTensors inputTensors
119  {
120  { 0, armnn::ConstTensor(inputTensorInfo0, inputData0.data()) },
121  { 1, armnn::ConstTensor(inputTensorInfo1, inputData1.data()) },
122  { 2, armnn::ConstTensor(inputTensorInfo2, inputData2.data()) }
123  };
124  OutputTensors outputTensors
125  {
126  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
127  };
128 
129  runtime->GetProfiler(netId)->EnableProfiling(true);
130 
131  // Do the inference
132  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
133 
134  // Retrieve the Profiler.Print() output to get the workload execution
136  std::stringstream ss;
137  profilerManager.GetProfiler()->Print(ss);;
138  std::string dump = ss.str();
139 
140  // Contains ImportMemGeneric
141  std::size_t found = dump.find("ImportMemGeneric");
142  CHECK(found != std::string::npos);
143 
144  // Contains SyncMemGeneric
145  found = dump.find("SyncMemGeneric");
146  CHECK(found != std::string::npos);
147 
148  // Does not contain CopyMemGeneric
149  found = dump.find("CopyMemGeneric");
150  CHECK(found == std::string::npos);
151 
152  // Use memory import between backends
153  CHECK((layer4->GetType() == LayerType::MemImport));
154 
155  // Check output is as expected
156  CHECK(outputData == expectedOutput);
157 }
158 
159 TEST_CASE("FallbackPaddingCopyToCpuAcc")
160 {
161  using namespace armnn;
162 
163  // Create a mock backend object
164  MockImportBackendInitialiser initialiser; // Register the Mock Backend
165  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
166  CHECK((backendObjPtr != nullptr));
167 
169  if (backendIds.find("MockRef") == backendIds.end())
170  {
171  std::string message = "Cannot load MockRef";
172  FAIL(message);
173  }
174 
175  // Create runtime in which test will run and allow fallback to CpuRef.
177  IRuntimePtr runtime(IRuntime::Create(options));
178 
179  // Builds up the structure of the network.
181 
182  Pooling2dDescriptor desc;
183 
184  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
185  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
186  IConnectableLayer* add = net->AddAdditionLayer("add");
187  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
188  IConnectableLayer* output = net->AddOutputLayer(0, "output");
189 
190  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
191  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
192  add->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
193  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
194 
195  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
196  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
197 
198  input0->GetOutputSlot(0).SetTensorInfo(info);
199  input1->GetOutputSlot(0).SetTensorInfo(info);
200  add->GetOutputSlot(0).SetTensorInfo(info);
201  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
202 
203  // optimize the network
204  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
205  OptimizerOptions optOptions;
206  optOptions.m_ImportEnabled = true;
207  optOptions.m_ExportEnabled = true;
208  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
209 
210  Graph& graph = GetGraphForTesting(optNet.get());
211 
212  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
213  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
214  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "add");
215  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ add (0) -> pooling (0) ]");
216  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "pooling");
217  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output");
218 
219  // Checks order is valid.
220  CHECK(CheckOrder(graph, layer0, layer1));
221  CHECK(CheckOrder(graph, layer1, layer2));
222  CHECK(CheckOrder(graph, layer2, layer3));
223  CHECK(CheckOrder(graph, layer3, layer4));
224  CHECK(CheckOrder(graph, layer4, layer5));
225 
226  // Load it into the runtime. It should pass.
227  NetworkId netId;
228  std::string ignoredErrorMessage;
230 
231  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
232 
233  // Creates structures for input & output
234  std::vector<float> inputData0
235  {
236  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
237  };
238  std::vector<float> inputData1
239  {
240  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
241  };
242 
243  std::vector<float> outputData(2);
244 
245  std::vector<float> expectedOutput
246  {
247  6.0f, 12.0f
248  };
249 
250  armnn::TensorInfo inputTensorInfo0 = runtime->GetInputTensorInfo(netId, 0);
251  armnn::TensorInfo inputTensorInfo1 = runtime->GetInputTensorInfo(netId, 1);
252  inputTensorInfo0.SetConstant(true);
253  inputTensorInfo1.SetConstant(true);
254 
255  InputTensors inputTensors
256  {
257  { 0, armnn::ConstTensor(inputTensorInfo0, inputData0.data()) },
258  { 1, armnn::ConstTensor(inputTensorInfo1, inputData1.data()) }
259  };
260  OutputTensors outputTensors
261  {
262  { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
263  };
264 
265  runtime->GetProfiler(netId)->EnableProfiling(true);
266 
267  // Do the inference
268  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
269 
270  // Retrieve the Profiler.Print() output to get the workload execution
272  std::stringstream ss;
273  profilerManager.GetProfiler()->Print(ss);;
274  std::string dump = ss.str();
275 
276  // Contains CopyMemGeneric between the backends
277  std::size_t found = dump.find("CopyMemGeneric");
278  CHECK(found != std::string::npos);
279 
280  // Contains SyncMemGeneric for the output
281  found = dump.find("SyncMemGeneric");
282  CHECK(found != std::string::npos);
283 
284  // Does not contain ImportMemGeneric
285  found = dump.find("ImportMemGeneric");
286  CHECK(found == std::string::npos);
287 
288  // Use memory import between backends
289  CHECK((layer3->GetType() == LayerType::MemCopy));
290 
291  // Check output is as expected
292  CHECK(outputData == expectedOutput);
293 }
294 
295 TEST_CASE("FallbackImportFromCpuAcc")
296 {
297  using namespace armnn;
298 
299  // Create a mock backend object
300  MockImportBackendInitialiser initialiser; // Register the Mock Backend
301  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
302  CHECK((backendObjPtr != nullptr));
303 
305  if (backendIds.find("MockRef") == backendIds.end())
306  {
307  std::string message = "Cannot load MockRef";
308  FAIL(message);
309  }
310 
311  // Create runtime in which test will run and allow fallback to CpuRef.
313  IRuntimePtr runtime(IRuntime::Create(options));
314 
315  // Builds up the structure of the network.
317 
318  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
319  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
320  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
321  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
322  IConnectableLayer* add = net->AddAdditionLayer("add");
323  IConnectableLayer* output = net->AddOutputLayer(0, "output");
324 
325  input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
326  input1->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
327  input2->GetOutputSlot(0).Connect(add->GetInputSlot(0));
328  sub->GetOutputSlot(0).Connect(add->GetInputSlot(1));
329  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
330 
331  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
332 
333  input0->GetOutputSlot(0).SetTensorInfo(info);
334  input1->GetOutputSlot(0).SetTensorInfo(info);
335  input2->GetOutputSlot(0).SetTensorInfo(info);
336  sub->GetOutputSlot(0).SetTensorInfo(info);
337  add->GetOutputSlot(0).SetTensorInfo(info);
338 
339  // optimize the network
340  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
341  OptimizerOptions optOptions;
342  optOptions.m_ImportEnabled = true;
343  optOptions.m_ExportEnabled = true;
344  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
345 
346  Graph& graph = GetGraphForTesting(optNet.get());
347 
348  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
349  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
350  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
351  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "sub");
352  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ sub (0) -> add (1) ]");
353  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "add");
354  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
355 
356  // Checks order is valid.
357  CHECK(CheckOrder(graph, layer0, layer1));
358  CHECK(CheckOrder(graph, layer1, layer2));
359  CHECK(CheckOrder(graph, layer2, layer3));
360  CHECK(CheckOrder(graph, layer3, layer4));
361  CHECK(CheckOrder(graph, layer4, layer5));
362  CHECK(CheckOrder(graph, layer5, layer6));
363 
364  // Load it into the runtime. It should pass.
365  NetworkId netId;
366  std::string ignoredErrorMessage;
367 
369  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
370 
371  // Creates structures for input & output
372  std::vector<float> inputData0
373  {
374  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
375  };
376  std::vector<float> inputData1
377  {
378  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
379  };
380  std::vector<float> inputData2
381  {
382  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
383  };
384 
385  std::vector<float> outputData(12);
386 
387  std::vector<float> expectedOutput
388  {
389  13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
390  };
391 
392  armnn::TensorInfo inputTensorInfo0 = runtime->GetInputTensorInfo(netId, 0);
393  armnn::TensorInfo inputTensorInfo1 = runtime->GetInputTensorInfo(netId, 1);
394  armnn::TensorInfo inputTensorInfo2 = runtime->GetInputTensorInfo(netId, 2);
395  inputTensorInfo0.SetConstant(true);
396  inputTensorInfo1.SetConstant(true);
397  inputTensorInfo2.SetConstant(true);
398 
399  InputTensors inputTensors
400  {
401  { 0, armnn::ConstTensor(inputTensorInfo0, inputData0.data()) },
402  { 1, armnn::ConstTensor(inputTensorInfo1, inputData1.data()) },
403  { 2, armnn::ConstTensor(inputTensorInfo2, inputData2.data()) }
404  };
405  OutputTensors outputTensors
406  {
407  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
408  };
409 
410  runtime->GetProfiler(netId)->EnableProfiling(true);
411 
412  // Do the inference
413  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
414 
415  // Retrieve the Profiler.Print() output to get the workload execution
417  std::stringstream ss;
418  profilerManager.GetProfiler()->Print(ss);;
419  std::string dump = ss.str();
420 
421  // Contains ImportMemGeneric
422  std::size_t found = dump.find("ImportMemGeneric");
423  CHECK(found != std::string::npos);
424 
425  // Contains SyncMemGeneric
426  found = dump.find("SyncMemGeneric");
427  CHECK(found != std::string::npos);
428 
429  // Does not contain CopyMemGeneric
430  found = dump.find("CopyMemGeneric");
431  CHECK(found == std::string::npos);
432 
433  // Use memory import between backends
434  CHECK((layer4->GetType() == LayerType::MemImport));
435 
436  // Check output is as expected
437  CHECK(outputData == expectedOutput);
438 }
439 
440 TEST_CASE("FallbackPaddingCopyFromCpuAcc")
441 {
442  using namespace armnn;
443 
444  // Create a mock backend object
445  MockImportBackendInitialiser initialiser; // Register the Mock Backend
446  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
447  CHECK((backendObjPtr != nullptr));
448 
450  if (backendIds.find("MockRef") == backendIds.end())
451  {
452  std::string message = "Cannot load MockRef";
453  FAIL(message);
454  }
455 
456  // Create runtime in which test will run and allow fallback to CpuRef.
458  IRuntimePtr runtime(IRuntime::Create(options));
459 
460  // Builds up the structure of the network.
462 
463  Pooling2dDescriptor desc;
464 
465  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
466  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
467  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
468  IConnectableLayer* add = net->AddAdditionLayer("add");
469  IConnectableLayer* output = net->AddOutputLayer(0, "output");
470 
471  input0->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
472  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
473  pooling->GetOutputSlot(0).Connect(add->GetInputSlot(0));
474  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
475 
476  TensorInfo inputInfo = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
477  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
478 
479  input0->GetOutputSlot(0).SetTensorInfo(inputInfo);
480  input1->GetOutputSlot(0).SetTensorInfo(poolingInfo);
481  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
482  add->GetOutputSlot(0).SetTensorInfo(poolingInfo);
483 
484  // optimize the network
485  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
486  OptimizerOptions optOptions;
487  optOptions.m_ImportEnabled = true;
488  optOptions.m_ExportEnabled = true;
489  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
490 
491  Graph& graph = GetGraphForTesting(optNet.get());
492 
493  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
494  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
495  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "pooling");
496  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ pooling (0) -> add (0) ]");
497  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "add");
498  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output");
499 
500  // Checks order is valid.
501  CHECK(CheckOrder(graph, layer0, layer1));
502  CHECK(CheckOrder(graph, layer1, layer2));
503  CHECK(CheckOrder(graph, layer2, layer3));
504  CHECK(CheckOrder(graph, layer3, layer4));
505  CHECK(CheckOrder(graph, layer4, layer5));
506 
507  // Load it into the runtime. It should pass.
508  NetworkId netId;
509  std::string ignoredErrorMessage;
511 
512  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
513 
514  // Creates structures for input & output
515  std::vector<float> inputData0
516  {
517  1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f
518  };
519  std::vector<float> inputData1
520  {
521  -1.0f, 3.0f
522  };
523 
524  std::vector<float> outputData(2);
525 
526  std::vector<float> expectedOutput
527  {
528  5.0f, 15.0f
529  };
530 
531  armnn::TensorInfo inputTensorInfo0 = runtime->GetInputTensorInfo(netId, 0);
532  armnn::TensorInfo inputTensorInfo1 = runtime->GetInputTensorInfo(netId, 1);
533  inputTensorInfo0.SetConstant(true);
534  inputTensorInfo1.SetConstant(true);
535 
536  InputTensors inputTensors
537  {
538  { 0, armnn::ConstTensor(inputTensorInfo0, inputData0.data()) },
539  { 1, armnn::ConstTensor(inputTensorInfo1, inputData1.data()) }
540  };
541  OutputTensors outputTensors
542  {
543  { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
544  };
545 
546  runtime->GetProfiler(netId)->EnableProfiling(true);
547 
548  // Do the inference
549  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
550 
551  // Retrieve the Profiler.Print() output to get the workload execution
553  std::stringstream ss;
554  profilerManager.GetProfiler()->Print(ss);;
555  std::string dump = ss.str();
556 
557  // Contains CopyMemGeneric between the backends
558  std::size_t found = dump.find("CopyMemGeneric");
559  CHECK(found != std::string::npos);
560 
561  // Contains SyncMemGeneric for the output
562  found = dump.find("SyncMemGeneric");
563  CHECK(found != std::string::npos);
564 
565  // Does not contain ImportMemGeneric
566  found = dump.find("ImportMemGeneric");
567  CHECK(found == std::string::npos);
568 
569  // Use memory import between backends
570  CHECK((layer3->GetType() == LayerType::MemCopy));
571 
572  // Check output is as expected
573  CHECK(outputData == expectedOutput);
574 }
575 
576 TEST_CASE("FallbackDisableImportFromCpuAcc")
577 {
578  using namespace armnn;
579 
580  // Create a mock backend object
581  MockImportBackendInitialiser initialiser; // Register the Mock Backend
582  auto backendObjPtr = CreateBackendObject(MockImportBackendId());
583  CHECK((backendObjPtr != nullptr));
584 
586  if (backendIds.find("MockRef") == backendIds.end())
587  {
588  std::string message = "Cannot load MockRef";
589  FAIL(message);
590  }
591 
592  // Create runtime in which test will run and allow fallback to CpuRef.
594  IRuntimePtr runtime(IRuntime::Create(options));
595 
596  // Builds up the structure of the network.
598 
599  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
600  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
601  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
602  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
603  IConnectableLayer* add = net->AddAdditionLayer("add");
604  IConnectableLayer* output = net->AddOutputLayer(0, "output");
605 
606  input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
607  input1->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
608  input2->GetOutputSlot(0).Connect(add->GetInputSlot(0));
609  sub->GetOutputSlot(0).Connect(add->GetInputSlot(1));
610  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
611 
612  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
613 
614  input0->GetOutputSlot(0).SetTensorInfo(info);
615  input1->GetOutputSlot(0).SetTensorInfo(info);
616  input2->GetOutputSlot(0).SetTensorInfo(info);
617  sub->GetOutputSlot(0).SetTensorInfo(info);
618  add->GetOutputSlot(0).SetTensorInfo(info);
619 
620  // optimize the network
621  std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
622  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
623 
624  Graph& graph = GetGraphForTesting(optNet.get());
625 
626  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
627  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
628  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
629  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "sub");
630  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ sub (0) -> add (1) ]");
631  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "add");
632  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
633 
634  // Checks order is valid.
635  CHECK(CheckOrder(graph, layer0, layer1));
636  CHECK(CheckOrder(graph, layer1, layer2));
637  CHECK(CheckOrder(graph, layer2, layer3));
638  CHECK(CheckOrder(graph, layer3, layer4));
639  CHECK(CheckOrder(graph, layer4, layer5));
640  CHECK(CheckOrder(graph, layer5, layer6));
641 
642  // Load it into the runtime. It should pass.
643  NetworkId netId;
644  std::string ignoredErrorMessage;
646 
647  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
648 
649  // Creates structures for input & output
650  std::vector<float> inputData0
651  {
652  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
653  };
654  std::vector<float> inputData1
655  {
656  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
657  };
658  std::vector<float> inputData2
659  {
660  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
661  };
662 
663  std::vector<float> outputData(12);
664 
665  std::vector<float> expectedOutput
666  {
667  13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
668  };
669 
670  armnn::TensorInfo inputTensorInfo0 = runtime->GetInputTensorInfo(netId, 0);
671  armnn::TensorInfo inputTensorInfo1 = runtime->GetInputTensorInfo(netId, 1);
672  armnn::TensorInfo inputTensorInfo2 = runtime->GetInputTensorInfo(netId, 2);
673  inputTensorInfo0.SetConstant(true);
674  inputTensorInfo1.SetConstant(true);
675  inputTensorInfo2.SetConstant(true);
676 
677  InputTensors inputTensors
678  {
679  { 0, armnn::ConstTensor(inputTensorInfo0, inputData0.data()) },
680  { 1, armnn::ConstTensor(inputTensorInfo1, inputData1.data()) },
681  { 2, armnn::ConstTensor(inputTensorInfo2, inputData2.data()) }
682  };
683  OutputTensors outputTensors
684  {
685  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
686  };
687 
688  runtime->GetProfiler(netId)->EnableProfiling(true);
689 
690  // Do the inference
691  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
692 
693  // Retrieve the Profiler.Print() output to get the workload execution
695  std::stringstream ss;
696  profilerManager.GetProfiler()->Print(ss);;
697  std::string dump = ss.str();
698 
699  // Contains CopyMemGeneric between the backends
700  std::size_t found = dump.find("CopyMemGeneric");
701  CHECK(found != std::string::npos);
702 
703  // Does not contain ImportMemGeneric
704  found = dump.find("ImportMemGeneric");
705  CHECK(found == std::string::npos);
706 
707  // Use memory import between backends
708  CHECK((layer4->GetType() == LayerType::MemCopy));
709 
710  // Check output is as expected
711  CHECK(outputData == expectedOutput);
712 }
713 
714 #if defined(ARMCOMPUTECL_ENABLED)
715 TEST_CASE("NeonImportEnabledFallbackToCl")
716 {
717  using namespace armnn;
718 
720  IRuntimePtr runtime(IRuntime::Create(options));
721 
722  // Builds up the structure of the network.
724 
725  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
726  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
727  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
728  IConnectableLayer* add = net->AddAdditionLayer("add");
729  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
730  IConnectableLayer* output = net->AddOutputLayer(0, "output");
731 
732  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
733  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
734  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
735  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
736  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
737 
738  TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
739 
740  input0->GetOutputSlot(0).SetTensorInfo(info);
741  input1->GetOutputSlot(0).SetTensorInfo(info);
742  input2->GetOutputSlot(0).SetTensorInfo(info);
743  add->GetOutputSlot(0).SetTensorInfo(info);
744  sub->GetOutputSlot(0).SetTensorInfo(info);
745 
746  std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
747  // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
748  sub->BackendSelectionHint(backends[1]);
749 
750  // optimize the network
751  OptimizerOptions optOptions;
752  optOptions.m_ImportEnabled = true;
753  optOptions.m_ExportEnabled = true;
754  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
755 
756  Graph& graph = GetGraphForTesting(optNet.get());
757 
758  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
759  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
760  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
761  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
762  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
763  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
764  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
765 
766  // Checks order is valid.
767  CHECK(CheckOrder(graph, layer0, layer1));
768  CHECK(CheckOrder(graph, layer1, layer2));
769  CHECK(CheckOrder(graph, layer2, layer3));
770  CHECK(CheckOrder(graph, layer3, layer4));
771  CHECK(CheckOrder(graph, layer4, layer5));
772  CHECK(CheckOrder(graph, layer5, layer6));
773 
774  // Use memory import between backends
775  CHECK((layer4->GetType() == LayerType::MemCopy));
776 
777  // Correctly use backend hint
778  CHECK((layer5->GetBackendId() == Compute::GpuAcc ));
779 
780  // Load it into the runtime. It should pass.
781  NetworkId netId;
782  std::string ignoredErrorMessage;
783 
785 
786  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
787 
788  // Creates structures for input & output
789  std::vector<float> inputData0
790  {
791  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
792  };
793  std::vector<float> inputData1
794  {
795  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
796  };
797  std::vector<float> inputData2
798  {
799  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
800  };
801 
802  std::vector<float> outputData(16);
803 
804  std::vector<float> expectedOutput
805  {
806  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f, 11.0f, 9.0f, 7.0f, 5.0f
807  };
808 
809  // Creates structures for input & output
810  unsigned int numElements = info.GetNumElements();
811  size_t totalBytes = numElements * sizeof(float);
812 
813  // Prepare aligned data
814  const size_t alignment = 64;
815  size_t space = totalBytes + alignment + alignment;
816  auto inputData = std::make_unique<uint8_t[]>(space);
817  void* alignedInputPtr = inputData.get();
818  CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
819 
820  auto* intputPtr = reinterpret_cast<float*>(alignedInputPtr);
821  std::copy(inputData2.begin(), inputData2.end(), intputPtr);
822 
823  armnn::TensorInfo inputTensorInfo0 = runtime->GetInputTensorInfo(netId, 0);
824  armnn::TensorInfo inputTensorInfo1 = runtime->GetInputTensorInfo(netId, 1);
825  armnn::TensorInfo inputTensorInfo2 = runtime->GetInputTensorInfo(netId, 2);
826  inputTensorInfo0.SetConstant(true);
827  inputTensorInfo1.SetConstant(true);
828  inputTensorInfo2.SetConstant(true);
829 
830  InputTensors inputTensors
831  {
832  { 0, armnn::ConstTensor(inputTensorInfo0, inputData0.data()) },
833  { 1, armnn::ConstTensor(inputTensorInfo1, inputData1.data()) },
834  { 2, armnn::ConstTensor(inputTensorInfo2, alignedInputPtr) }
835  };
836  OutputTensors outputTensors
837  {
838  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
839  };
840 
841  runtime->GetProfiler(netId)->EnableProfiling(true);
842 
843  // Do the inference
844  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
845 
846  // Retrieve the Profiler.Print() output to get the workload execution
848  std::stringstream ss;
849  profilerManager.GetProfiler()->Print(ss);;
850  std::string dump = ss.str();
851 
852  // Executed Subtraction using GpuAcc
853  std::size_t found = dump.find("ClSubtractionWorkload_Execute");
854  CHECK(found != std::string::npos);
855 
856  // Contain CopyMemGeneric
857  found = dump.find("CopyMemGeneric");
858  CHECK(found != std::string::npos);
859 
860  // Check output is as expected
861  for(unsigned int i = 0; i < numElements; ++i)
862  {
863  CHECK(outputData[i] == expectedOutput[i]);
864  }
865  runtime->UnloadNetwork(netId);
866 }
867 
868 TEST_CASE("NeonImportDisabledFallbackToCl")
869 {
870  using namespace armnn;
871 
873  IRuntimePtr runtime(IRuntime::Create(options));
874 
875  // Builds up the structure of the network.
877 
878  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
879  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
880  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
881  IConnectableLayer* add = net->AddAdditionLayer("add");
882  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
883  IConnectableLayer* output = net->AddOutputLayer(0, "output");
884 
885  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
886  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
887  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
888  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
889  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
890 
891  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
892 
893  input0->GetOutputSlot(0).SetTensorInfo(info);
894  input1->GetOutputSlot(0).SetTensorInfo(info);
895  input2->GetOutputSlot(0).SetTensorInfo(info);
896  add->GetOutputSlot(0).SetTensorInfo(info);
897  sub->GetOutputSlot(0).SetTensorInfo(info);
898 
899  std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
900  // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
901  sub->BackendSelectionHint(backends[1]);
902 
903  // optimize the network
904  OptimizerOptions optOptions;
905  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
906 
907  Graph& graph = GetGraphForTesting(optNet.get());
908 
909  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
910  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
911  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
912  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
913  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
914  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
915  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
916 
917  // Checks order is valid.
918  CHECK(CheckOrder(graph, layer0, layer1));
919  CHECK(CheckOrder(graph, layer1, layer2));
920  CHECK(CheckOrder(graph, layer2, layer3));
921  CHECK(CheckOrder(graph, layer3, layer4));
922  CHECK(CheckOrder(graph, layer4, layer5));
923  CHECK(CheckOrder(graph, layer5, layer6));
924 
925  // Use memory import between backends
926  CHECK((layer4->GetType() == LayerType::MemCopy));
927 
928  // Correctly use backend hint
929  CHECK((layer5->GetBackendId() == Compute::GpuAcc ));
930 
931  // Load it into the runtime. It should pass.
932  NetworkId netId;
933  runtime->LoadNetwork(netId, std::move(optNet));
934 
935  // Creates structures for input & output
936  std::vector<float> inputData0
937  {
938  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
939  };
940  std::vector<float> inputData1
941  {
942  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
943  };
944  std::vector<float> inputData2
945  {
946  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
947  };
948 
949  std::vector<float> outputData(12);
950 
951  std::vector<float> expectedOutput
952  {
953  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
954  };
955 
956  armnn::TensorInfo inputTensorInfo0 = runtime->GetInputTensorInfo(netId, 0);
957  armnn::TensorInfo inputTensorInfo1 = runtime->GetInputTensorInfo(netId, 1);
958  armnn::TensorInfo inputTensorInfo2 = runtime->GetInputTensorInfo(netId, 2);
959  inputTensorInfo0.SetConstant(true);
960  inputTensorInfo1.SetConstant(true);
961  inputTensorInfo2.SetConstant(true);
962 
963  InputTensors inputTensors
964  {
965  { 0, armnn::ConstTensor(inputTensorInfo0, inputData0.data()) },
966  { 1, armnn::ConstTensor(inputTensorInfo1, inputData1.data()) },
967  { 2, armnn::ConstTensor(inputTensorInfo2, inputData2.data()) }
968  };
969  OutputTensors outputTensors
970  {
971  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
972  };
973 
974  runtime->GetProfiler(netId)->EnableProfiling(true);
975 
976  // Do the inference
977  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
978 
979  // Retrieve the Profiler.Print() output to get the workload execution
981  std::stringstream ss;
982  profilerManager.GetProfiler()->Print(ss);;
983  std::string dump = ss.str();
984 
985  // Executed Subtraction using GpuAcc
986  std::size_t found = dump.find("ClSubtractionWorkload_Execute");
987  CHECK(found != std::string::npos);
988 
989  // Contain CopyMemGeneric
990  found = dump.find("CopyMemGeneric");
991  CHECK(found != std::string::npos);
992 
993  // Check output is as expected
994  CHECK(outputData == expectedOutput);
995 }
996 
997 TEST_CASE("NeonImportEnabledFallbackSubgraphToCl")
998 {
999  using namespace armnn;
1000 
1001  IRuntime::CreationOptions options;
1002  IRuntimePtr runtime(IRuntime::Create(options));
1003 
1004  // Builds up the structure of the network.
1006 
1007  Pooling2dDescriptor desc;
1008  desc.m_PoolWidth = 2;
1009  desc.m_PoolHeight = 2;
1010  desc.m_StrideX = 2;
1011  desc.m_StrideY = 2;
1012 
1013  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
1014  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
1015  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
1016  IConnectableLayer* add = net->AddAdditionLayer("add");
1017  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
1018  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
1019  IConnectableLayer* output = net->AddOutputLayer(0, "output");
1020 
1021  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
1022  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
1023  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
1024  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
1025  sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
1026  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1027 
1028  TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
1029  TensorInfo poolingInfo = TensorInfo({ 1, 2, 2, 1 }, DataType::Float32);
1030 
1031  input0->GetOutputSlot(0).SetTensorInfo(info);
1032  input1->GetOutputSlot(0).SetTensorInfo(info);
1033  input2->GetOutputSlot(0).SetTensorInfo(info);
1034  add->GetOutputSlot(0).SetTensorInfo(info);
1035  sub->GetOutputSlot(0).SetTensorInfo(info);
1036  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
1037 
1038  std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
1039  // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
1040  sub->BackendSelectionHint(backends[1]);
1041 
1042  // optimize the network
1043  OptimizerOptions optOptions;
1044  optOptions.m_ImportEnabled = true;
1045  optOptions.m_ExportEnabled = true;
1046  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
1047 
1048  Graph& graph = GetGraphForTesting(optNet.get());
1049 
1050  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
1051  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
1052  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
1053  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
1054  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
1055  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
1056  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
1057  armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
1058  armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
1059 
1060  // Checks order is valid.
1061  CHECK(CheckOrder(graph, layer0, layer1));
1062  CHECK(CheckOrder(graph, layer1, layer2));
1063  CHECK(CheckOrder(graph, layer2, layer3));
1064  CHECK(CheckOrder(graph, layer3, layer4));
1065  CHECK(CheckOrder(graph, layer4, layer5));
1066  CHECK(CheckOrder(graph, layer5, layer6));
1067  CHECK(CheckOrder(graph, layer6, layer7));
1068  CHECK(CheckOrder(graph, layer7, layer8));
1069 
1070  // Use memory import between backends
1071  CHECK((layer4->GetType() == LayerType::MemCopy));
1072  CHECK((layer6->GetType() == LayerType::MemCopy));
1073 
1074  // Correctly use backend hint
1075  CHECK((layer5->GetBackendId() == Compute::GpuAcc ));
1076 
1077  // Load it into the runtime. It should pass.
1078  NetworkId netId;
1079  std::string ignoredErrorMessage;
1080 
1082 
1083  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
1084 
1085  // Creates structures for input & output
1086  std::vector<float> inputData0
1087  {
1088  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
1089  };
1090  std::vector<float> inputData1
1091  {
1092  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
1093  };
1094  std::vector<float> inputData2
1095  {
1096  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
1097  };
1098 
1099  std::vector<float> outputData(4);
1100 
1101  std::vector<float> expectedOutput{ 11.0f, 3.0f, -5.0f, 11.0f };
1102 
1103  // Prepare aligned data
1104  unsigned int numElements = info.GetNumElements();
1105  size_t totalBytes = numElements * sizeof(float);
1106  const size_t alignment = 64;
1107  size_t space = totalBytes + alignment + alignment;
1108  auto inputData = std::make_unique<uint8_t[]>(space);
1109  void* alignedInputPtr = inputData.get();
1110  CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
1111 
1112  auto* intputPtr = reinterpret_cast<float*>(alignedInputPtr);
1113  std::copy(inputData2.begin(), inputData2.end(), intputPtr);
1114 
1115  armnn::TensorInfo inputTensorInfo0 = runtime->GetInputTensorInfo(netId, 0);
1116  armnn::TensorInfo inputTensorInfo1 = runtime->GetInputTensorInfo(netId, 1);
1117  armnn::TensorInfo inputTensorInfo2 = runtime->GetInputTensorInfo(netId, 2);
1118  inputTensorInfo0.SetConstant(true);
1119  inputTensorInfo1.SetConstant(true);
1120  inputTensorInfo2.SetConstant(true);
1121 
1122  InputTensors inputTensors
1123  {
1124  { 0, armnn::ConstTensor(inputTensorInfo0, inputData0.data()) },
1125  { 1, armnn::ConstTensor(inputTensorInfo1, inputData1.data()) },
1126  { 2, armnn::ConstTensor(inputTensorInfo2, alignedInputPtr) }
1127  };
1128  OutputTensors outputTensors
1129  {
1130  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1131  };
1132 
1133  runtime->GetProfiler(netId)->EnableProfiling(true);
1134 
1135  // Do the inference
1136  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1137 
1138  // Retrieve the Profiler.Print() output to get the workload execution
1140  std::stringstream ss;
1141  profilerManager.GetProfiler()->Print(ss);;
1142  std::string dump = ss.str();
1143 
1144  // Executed Subtraction using GpuAcc
1145  std::size_t found = dump.find("ClSubtractionWorkload_Execute");
1146  CHECK(found != std::string::npos);
1147 
1148  // Correctly switch back to CpuAcc
1149  found = dump.find("NeonPooling2dWorkload_Execute");
1150  CHECK(found != std::string::npos);
1151 
1152  // Contain CopyMemGeneric
1153  found = dump.find("CopyMemGeneric");
1154  CHECK(found != std::string::npos);
1155 
1156  // Contains SyncMemGeneric for output
1157  found = dump.find("SyncMemGeneric");
1158  CHECK(found != std::string::npos);
1159 
1160  // Check output is as expected
1161  CHECK(outputData == expectedOutput);
1162  runtime->UnloadNetwork(netId);
1163 }
1164 
1165 TEST_CASE("NeonImportDisableFallbackSubgraphToCl")
1166 {
1167  using namespace armnn;
1168 
1169  IRuntime::CreationOptions options;
1170  IRuntimePtr runtime(IRuntime::Create(options));
1171 
1172  // Builds up the structure of the network.
1174 
1175  Pooling2dDescriptor desc;
1176 
1177  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
1178  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
1179  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
1180  IConnectableLayer* add = net->AddAdditionLayer("add");
1181  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
1182  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
1183  IConnectableLayer* output = net->AddOutputLayer(0, "output");
1184 
1185  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
1186  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
1187  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
1188  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
1189  sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
1190  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1191 
1192  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
1193  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
1194 
1195  input0->GetOutputSlot(0).SetTensorInfo(info);
1196  input1->GetOutputSlot(0).SetTensorInfo(info);
1197  input2->GetOutputSlot(0).SetTensorInfo(info);
1198  add->GetOutputSlot(0).SetTensorInfo(info);
1199  sub->GetOutputSlot(0).SetTensorInfo(info);
1200  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
1201 
1202  std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
1203  // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
1204  sub->BackendSelectionHint(backends[1]);
1205 
1206  // optimize the network
1207  OptimizerOptions optOptions;
1208  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
1209 
1210  Graph& graph = GetGraphForTesting(optNet.get());
1211 
1212  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
1213  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
1214  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
1215  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
1216  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
1217  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
1218  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
1219  armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
1220  armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
1221 
1222  // Checks order is valid.
1223  CHECK(CheckOrder(graph, layer0, layer1));
1224  CHECK(CheckOrder(graph, layer1, layer2));
1225  CHECK(CheckOrder(graph, layer2, layer3));
1226  CHECK(CheckOrder(graph, layer3, layer4));
1227  CHECK(CheckOrder(graph, layer4, layer5));
1228  CHECK(CheckOrder(graph, layer5, layer6));
1229  CHECK(CheckOrder(graph, layer6, layer7));
1230  CHECK(CheckOrder(graph, layer7, layer8));
1231 
1232  // Use memory import between backends
1233  CHECK((layer4->GetType() == LayerType::MemCopy));
1234  CHECK((layer6->GetType() == LayerType::MemCopy));
1235 
1236  // Correctly use backend hint
1237  CHECK((layer5->GetBackendId() == Compute::GpuAcc ));
1238 
1239  // Load it into the runtime. It should pass.
1240  NetworkId netId;
1241  runtime->LoadNetwork(netId, std::move(optNet));
1242 
1243  // Creates structures for input & output
1244  std::vector<float> inputData0
1245  {
1246  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
1247  };
1248  std::vector<float> inputData1
1249  {
1250  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
1251  };
1252  std::vector<float> inputData2
1253  {
1254  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
1255  };
1256 
1257  std::vector<float> outputData(2);
1258 
1259  std::vector<float> expectedOutput{ 11.0f, -1.0f };
1260 
1261  armnn::TensorInfo inputTensorInfo0 = runtime->GetInputTensorInfo(netId, 0);
1262  armnn::TensorInfo inputTensorInfo1 = runtime->GetInputTensorInfo(netId, 1);
1263  armnn::TensorInfo inputTensorInfo2 = runtime->GetInputTensorInfo(netId, 2);
1264  inputTensorInfo0.SetConstant(true);
1265  inputTensorInfo1.SetConstant(true);
1266  inputTensorInfo2.SetConstant(true);
1267 
1268  InputTensors inputTensors
1269  {
1270  { 0, armnn::ConstTensor(inputTensorInfo0, inputData0.data()) },
1271  { 1, armnn::ConstTensor(inputTensorInfo1, inputData1.data()) },
1272  { 2, armnn::ConstTensor(inputTensorInfo2, inputData2.data()) }
1273  };
1274  OutputTensors outputTensors
1275  {
1276  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1277  };
1278 
1279  runtime->GetProfiler(netId)->EnableProfiling(true);
1280 
1281  // Do the inference
1282  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1283 
1284  // Retrieve the Profiler.Print() output to get the workload execution
1286  std::stringstream ss;
1287  profilerManager.GetProfiler()->Print(ss);;
1288  std::string dump = ss.str();
1289 
1290  // Executed Subtraction using GpuAcc
1291  std::size_t found = dump.find("ClSubtractionWorkload_Execute");
1292  CHECK(found != std::string::npos);
1293 
1294  // Correctly switch back to CpuAcc
1295  found = dump.find("NeonPooling2dWorkload_Execute");
1296  CHECK(found != std::string::npos);
1297 
1298  // Contain CopyMemGeneric
1299  found = dump.find("CopyMemGeneric");
1300  CHECK(found != std::string::npos);
1301 
1302  // Check output is as expected
1303  CHECK(outputData == expectedOutput);
1304 }
1305 #endif
1306 
1307 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:49
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:68
BackendIdSet GetBackendIds() const
std::unordered_set< BackendId > BackendIdSet
Definition: BackendId.hpp:193
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:572
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
Definition: GraphUtils.cpp:68
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
uint32_t m_PoolWidth
Pooling width value.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:33
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:609
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
Copyright (c) 2021 ARM Limited and Contributors.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
constexpr const char * MockImportBackendId()
IProfiler * GetProfiler()
Definition: Profiling.cpp:584
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
uint32_t m_PoolHeight
Pooling height value.
TEST_SUITE("NeonFallback")
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1864
int NetworkId
Definition: IRuntime.hpp:27
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:273
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:239
GPU Execution: OpenCL: ArmCompute.
ArmNN performs an optimization on each model/network before it gets loaded for execution.
Definition: INetwork.hpp:127
const BackendId & GetBackendId() const
Definition: Layer.hpp:277
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:49
CPU Execution: NEON: ArmCompute.
armnn::IBackendInternalUniquePtr CreateBackendObject(const armnn::BackendId &backendId)
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:514
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:238
virtual int Connect(IInputSlot &destination)=0
A Pooling2dDescriptor for the Pooling2dLayer.
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:475
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
unsigned int GetNumElements() const
Definition: Tensor.hpp:196