ArmNN
 22.08
ClFallbackTests.cpp File Reference
#include <CommonTestUtils.hpp>
#include <GraphUtils.hpp>
#include <doctest/doctest.h>

Go to the source code of this file.

Functions

 TEST_SUITE ("ClFallback")
 

Function Documentation

◆ TEST_SUITE()

TEST_SUITE ( "ClFallback"  )

Definition at line 12 of file ClFallbackTests.cpp.

References IConnectableLayer::BackendSelectionHint(), CheckOrder(), IOutputSlot::Connect(), armnn::CpuAcc, IRuntime::Create(), INetwork::Create(), armnn::Float32, Layer::GetBackendId(), GetFirstLayerWithName(), armnn::GetGraphForTesting(), IConnectableLayer::GetInputSlot(), ProfilerManager::GetInstance(), TensorInfo::GetNumElements(), IConnectableLayer::GetOutputSlot(), ProfilerManager::GetProfiler(), Layer::GetType(), armnn::GpuAcc, armnn::info, OptimizerOptions::m_ExportEnabled, OptimizerOptions::m_ImportEnabled, Pooling2dDescriptor::m_PoolHeight, Pooling2dDescriptor::m_PoolWidth, Pooling2dDescriptor::m_StrideX, Pooling2dDescriptor::m_StrideY, armnn::Malloc, armnn::MemCopy, armnn::Optimize(), IProfiler::Print(), TensorInfo::SetConstant(), and IOutputSlot::SetTensorInfo().

13 {
14 TEST_CASE("ClImportEnabledFallbackToNeon")
15 {
16  using namespace armnn;
17 
19  IRuntimePtr runtime(IRuntime::Create(options));
20 
21  // Builds up the structure of the network.
23 
24  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
25  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
26  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
27  IConnectableLayer* add = net->AddAdditionLayer("add");
28  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
29  IConnectableLayer* output = net->AddOutputLayer(0, "output");
30 
31  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
32  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
33  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
34  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
35  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
36 
37  TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
38  info.SetConstant(true);
39 
40  input0->GetOutputSlot(0).SetTensorInfo(info);
41  input1->GetOutputSlot(0).SetTensorInfo(info);
42  input2->GetOutputSlot(0).SetTensorInfo(info);
43  add->GetOutputSlot(0).SetTensorInfo(info);
44  sub->GetOutputSlot(0).SetTensorInfo(info);
45 
46  std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
47  // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
48  sub->BackendSelectionHint(backends[1]);
49 
50  // optimize the network
51  OptimizerOptions optOptions;
52  optOptions.m_ImportEnabled = true;
53  optOptions.m_ExportEnabled = true;
54  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
55 
56  Graph& graph = GetGraphForTesting(optNet.get());
57 
58  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
59  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
60  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
61  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
62  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
63  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
64  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
65 
66  // Checks order is valid.
67  CHECK(CheckOrder(graph, layer0, layer1));
68  CHECK(CheckOrder(graph, layer1, layer2));
69  CHECK(CheckOrder(graph, layer2, layer3));
70  CHECK(CheckOrder(graph, layer3, layer4));
71  CHECK(CheckOrder(graph, layer4, layer5));
72  CHECK(CheckOrder(graph, layer5, layer6));
73 
74  // Use memory import between backends
75  CHECK((layer4->GetType() == LayerType::MemCopy));
76 
77  // Correctly use backend hint
78  CHECK((layer5->GetBackendId() == Compute::CpuAcc ));
79 
80  // Load it into the runtime. It should pass.
81  NetworkId netId;
82  std::string ignoredErrorMessage;
84  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
85 
86  // Creates structures for input & output
87  std::vector<float> inputValue0
88  {
89  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
90  };
91  std::vector<float> inputValue1
92  {
93  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
94  };
95  std::vector<float> inputData2
96  {
97  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
98  };
99 
100  std::vector<float> outputData(16);
101 
102  std::vector<float> expectedOutput
103  {
104  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f, 11.0f, 9.0f, 7.0f, 5.0f
105  };
106 
107  // Prepare aligned data
108  unsigned int numElements = info.GetNumElements();
109  size_t totalBytes = numElements * sizeof(float);
110  const size_t alignment = 64;
111  size_t space = totalBytes + alignment + alignment;
112  auto inputData0 = std::make_unique<uint8_t[]>(space);
113  void* alignedInputPtr0 = inputData0.get();
114  CHECK(std::align(alignment, totalBytes, alignedInputPtr0, space));
115 
116  auto* intputPtr0 = reinterpret_cast<float*>(alignedInputPtr0);
117  std::copy(inputValue0.begin(), inputValue0.end(), intputPtr0);
118 
119  auto inputData1 = std::make_unique<uint8_t[]>(space);
120  void* alignedInputPtr1 = inputData1.get();
121  CHECK(std::align(alignment, totalBytes, alignedInputPtr1, space));
122 
123  auto* intputPtr1 = reinterpret_cast<float*>(alignedInputPtr1);
124  std::copy(inputValue1.begin(), inputValue1.end(), intputPtr1);
125 
126  InputTensors inputTensors
127  {
128  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputPtr0) },
129  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), alignedInputPtr1) },
130  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
131  };
132  OutputTensors outputTensors
133  {
134  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
135  };
136 
137  runtime->GetProfiler(netId)->EnableProfiling(true);
138 
139  // Do the inference
140  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
141 
142  // Retrieve the Profiler.Print() output to get the workload execution
144  std::stringstream ss;
145  profilerManager.GetProfiler()->Print(ss);;
146  std::string dump = ss.str();
147 
148  // Executed Subtraction using CpuAcc
149  std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
150  CHECK(found != std::string::npos);
151 
152  // Contain CopyMemGeneric
153  found = dump.find("CopyMemGeneric");
154  CHECK(found != std::string::npos);
155 
156  // Check output is as expected
157  CHECK(outputData == expectedOutput);
158 
159  runtime->UnloadNetwork(netId);
160 }
161 
162 TEST_CASE("ClImportDisabledFallbackToNeon")
163 {
164  using namespace armnn;
165 
167  IRuntimePtr runtime(IRuntime::Create(options));
168 
169  // Builds up the structure of the network.
171 
172  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
173  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
174  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
175  IConnectableLayer* add = net->AddAdditionLayer("add");
176  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
177  IConnectableLayer* output = net->AddOutputLayer(0, "output");
178 
179  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
180  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
181  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
182  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
183  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
184 
185  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
186  info.SetConstant(true);
187 
188  input0->GetOutputSlot(0).SetTensorInfo(info);
189  input1->GetOutputSlot(0).SetTensorInfo(info);
190  input2->GetOutputSlot(0).SetTensorInfo(info);
191  add->GetOutputSlot(0).SetTensorInfo(info);
192  sub->GetOutputSlot(0).SetTensorInfo(info);
193 
194  std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
195  // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
196  sub->BackendSelectionHint(backends[1]);
197 
198  // optimize the network
199  OptimizerOptions optOptions;
200  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
201 
202  Graph& graph = GetGraphForTesting(optNet.get());
203 
204  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
205  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
206  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
207  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
208  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
209  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
210  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
211 
212  // Checks order is valid.
213  CHECK(CheckOrder(graph, layer0, layer1));
214  CHECK(CheckOrder(graph, layer1, layer2));
215  CHECK(CheckOrder(graph, layer2, layer3));
216  CHECK(CheckOrder(graph, layer3, layer4));
217  CHECK(CheckOrder(graph, layer4, layer5));
218  CHECK(CheckOrder(graph, layer5, layer6));
219 
220  // Use memory import between backends
221  CHECK((layer4->GetType() == LayerType::MemCopy));
222 
223  // Correctly use backend hint
224  CHECK((layer5->GetBackendId() == Compute::CpuAcc ));
225 
226  // Load it into the runtime. It should pass.
227  NetworkId netId;
228  runtime->LoadNetwork(netId, std::move(optNet));
229 
230  // Creates structures for input & output
231  std::vector<float> inputData0
232  {
233  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
234  };
235  std::vector<float> inputData1
236  {
237  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
238  };
239  std::vector<float> inputData2
240  {
241  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
242  };
243 
244  std::vector<float> outputData(12);
245 
246  std::vector<float> expectedOutput
247  {
248  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
249  };
250 
251  InputTensors inputTensors
252  {
253  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
254  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
255  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
256  };
257  OutputTensors outputTensors
258  {
259  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
260  };
261 
262  runtime->GetProfiler(netId)->EnableProfiling(true);
263 
264  // Do the inference
265  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
266 
267  // Retrieve the Profiler.Print() output to get the workload execution
269  std::stringstream ss;
270  profilerManager.GetProfiler()->Print(ss);;
271  std::string dump = ss.str();
272 
273  // Executed Subtraction using CpuAcc
274  std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
275  CHECK(found != std::string::npos);
276 
277  // Contain CopyMemGeneric
278  found = dump.find("CopyMemGeneric");
279  CHECK(found != std::string::npos);
280 
281  // Check output is as expected
282  CHECK(outputData == expectedOutput);
283 }
284 
285 TEST_CASE("ClImportEnabledFallbackSubgraphToNeon")
286 {
287  using namespace armnn;
288 
290  IRuntimePtr runtime(IRuntime::Create(options));
291 
292  // Builds up the structure of the network.
294 
295  Pooling2dDescriptor desc;
296  desc.m_PoolWidth = 2;
297  desc.m_PoolHeight = 2;
298  desc.m_StrideX = 2;
299  desc.m_StrideY = 2;
300 
301  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
302  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
303  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
304  IConnectableLayer* add = net->AddAdditionLayer("add");
305  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
306  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
307  IConnectableLayer* output = net->AddOutputLayer(0, "output");
308 
309  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
310  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
311  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
312  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
313  sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
314  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
315 
316  TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
317  info.SetConstant(true);
318  TensorInfo poolingInfo = TensorInfo({ 1, 2, 2, 1 }, DataType::Float32);
319 
320  input0->GetOutputSlot(0).SetTensorInfo(info);
321  input1->GetOutputSlot(0).SetTensorInfo(info);
322  input2->GetOutputSlot(0).SetTensorInfo(info);
323  add->GetOutputSlot(0).SetTensorInfo(info);
324  sub->GetOutputSlot(0).SetTensorInfo(info);
325  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
326 
327  std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
328  // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
329  sub->BackendSelectionHint(backends[1]);
330 
331  // optimize the network
332  OptimizerOptions optOptions;
333  optOptions.m_ImportEnabled = true;
334  optOptions.m_ExportEnabled = true;
335  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
336 
337  Graph& graph = GetGraphForTesting(optNet.get());
338 
339  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
340  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
341  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
342  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
343  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
344  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
345  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
346  armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
347  armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
348 
349  // Checks order is valid.
350  CHECK(CheckOrder(graph, layer0, layer1));
351  CHECK(CheckOrder(graph, layer1, layer2));
352  CHECK(CheckOrder(graph, layer2, layer3));
353  CHECK(CheckOrder(graph, layer3, layer4));
354  CHECK(CheckOrder(graph, layer4, layer5));
355  CHECK(CheckOrder(graph, layer5, layer6));
356  CHECK(CheckOrder(graph, layer6, layer7));
357  CHECK(CheckOrder(graph, layer7, layer8));
358 
359  // Use memory import between backends
360  CHECK((layer4->GetType() == LayerType::MemCopy));
361  CHECK((layer6->GetType() == LayerType::MemCopy));
362 
363  // Correctly use backend hint
364  CHECK((layer5->GetBackendId() == Compute::CpuAcc ));
365 
366  // Load it into the runtime. It should pass.
367  NetworkId netId;
368  std::string ignoredErrorMessage;
370  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
371 
372  // Creates structures for input & output
373  std::vector<float> inputValue0
374  {
375  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
376  };
377  std::vector<float> inputValue1
378  {
379  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
380  };
381  std::vector<float> inputData2
382  {
383  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
384  };
385 
386  std::vector<float> outputData(4);
387 
388  std::vector<float> expectedOutput{ 11.0f, 3.0f, -5.0f, 11.0f };
389 
390  unsigned int numElements = info.GetNumElements();
391  size_t totalBytes = numElements * sizeof(float);
392  const size_t alignment = 64;
393  size_t space = totalBytes + alignment + alignment;
394  auto inputData0 = std::make_unique<uint8_t[]>(space);
395  void* alignedInputPtr0 = inputData0.get();
396  CHECK(std::align(alignment, totalBytes, alignedInputPtr0, space));
397 
398  auto* intputPtr0 = reinterpret_cast<float*>(alignedInputPtr0);
399  std::copy(inputValue0.begin(), inputValue0.end(), intputPtr0);
400 
401  auto inputData1 = std::make_unique<uint8_t[]>(space);
402  void* alignedInputPtr1 = inputData1.get();
403  CHECK(std::align(alignment, totalBytes, alignedInputPtr1, space));
404 
405  auto* intputPtr1 = reinterpret_cast<float*>(alignedInputPtr1);
406  std::copy(inputValue1.begin(), inputValue1.end(), intputPtr1);
407 
408  InputTensors inputTensors
409  {
410  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputPtr0) },
411  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), alignedInputPtr1) },
412  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
413  };
414  OutputTensors outputTensors
415  {
416  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
417  };
418 
419  runtime->GetProfiler(netId)->EnableProfiling(true);
420 
421  // Do the inference
422  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
423 
424  // Retrieve the Profiler.Print() output to get the workload execution
426  std::stringstream ss;
427  profilerManager.GetProfiler()->Print(ss);;
428  std::string dump = ss.str();
429 
430  // Executed Subtraction using CpuAcc
431  std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
432  CHECK(found != std::string::npos);
433 
434  // Correctly switch back to GpuAcc
435  found = dump.find("ClPooling2dWorkload_Execute");
436  CHECK(found != std::string::npos);
437 
438  // Contain CopyMemGeneric
439  found = dump.find("CopyMemGeneric");
440  CHECK(found != std::string::npos);
441 
442  // Check output is as expected
443  CHECK(outputData == expectedOutput);
444 
445  runtime->UnloadNetwork(netId);
446 }
447 
448 TEST_CASE("ClImportDisableFallbackSubgraphToNeon")
449 {
450  using namespace armnn;
451 
453  IRuntimePtr runtime(IRuntime::Create(options));
454 
455  // Builds up the structure of the network.
457 
458  Pooling2dDescriptor desc;
459 
460  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
461  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
462  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
463  IConnectableLayer* add = net->AddAdditionLayer("add");
464  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
465  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
466  IConnectableLayer* output = net->AddOutputLayer(0, "output");
467 
468  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
469  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
470  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
471  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
472  sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
473  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
474 
475  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
476  info.SetConstant(true);
477  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
478 
479  input0->GetOutputSlot(0).SetTensorInfo(info);
480  input1->GetOutputSlot(0).SetTensorInfo(info);
481  input2->GetOutputSlot(0).SetTensorInfo(info);
482  add->GetOutputSlot(0).SetTensorInfo(info);
483  sub->GetOutputSlot(0).SetTensorInfo(info);
484  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
485 
486  std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
487  // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
488  sub->BackendSelectionHint(backends[1]);
489 
490  // optimize the network
491  OptimizerOptions optOptions;
492  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
493 
494  Graph& graph = GetGraphForTesting(optNet.get());
495 
496  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
497  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
498  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
499  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
500  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
501  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
502  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
503  armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
504  armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
505 
506  // Checks order is valid.
507  CHECK(CheckOrder(graph, layer0, layer1));
508  CHECK(CheckOrder(graph, layer1, layer2));
509  CHECK(CheckOrder(graph, layer2, layer3));
510  CHECK(CheckOrder(graph, layer3, layer4));
511  CHECK(CheckOrder(graph, layer4, layer5));
512  CHECK(CheckOrder(graph, layer5, layer6));
513  CHECK(CheckOrder(graph, layer6, layer7));
514  CHECK(CheckOrder(graph, layer7, layer8));
515 
516  // Use memory import between backends
517  CHECK((layer4->GetType() == LayerType::MemCopy));
518  CHECK((layer6->GetType() == LayerType::MemCopy));
519 
520  // Correctly use backend hint
521  CHECK((layer5->GetBackendId() == Compute::CpuAcc ));
522 
523  // Load it into the runtime. It should pass.
524  NetworkId netId;
525  runtime->LoadNetwork(netId, std::move(optNet));
526 
527  // Creates structures for input & output
528  std::vector<float> inputData0
529  {
530  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
531  };
532  std::vector<float> inputData1
533  {
534  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
535  };
536  std::vector<float> inputData2
537  {
538  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
539  };
540 
541  std::vector<float> outputData(2);
542 
543  std::vector<float> expectedOutput{ 11.0f, -1.0f };
544 
545  InputTensors inputTensors
546  {
547  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
548  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
549  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
550  };
551  OutputTensors outputTensors
552  {
553  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
554  };
555 
556  runtime->GetProfiler(netId)->EnableProfiling(true);
557 
558  // Do the inference
559  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
560 
561  // Retrieve the Profiler.Print() output to get the workload execution
563  std::stringstream ss;
564  profilerManager.GetProfiler()->Print(ss);;
565  std::string dump = ss.str();
566 
567  // Executed Subtraction using CpuAcc
568  std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
569  CHECK(found != std::string::npos);
570 
571  // Correctly switch back to GpuAcc
572  found = dump.find("ClPooling2dWorkload_Execute");
573  CHECK(found != std::string::npos);
574 
575  // Contain CopyMemGeneric
576  found = dump.find("CopyMemGeneric");
577  CHECK(found != std::string::npos);
578 
579  // Check output is as expected
580  CHECK(outputData == expectedOutput);
581 }
582 
583 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:49
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:68
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:572
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
Definition: GraphUtils.cpp:68
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
uint32_t m_PoolWidth
Pooling width value.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:33
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:609
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
Copyright (c) 2021 ARM Limited and Contributors.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
Definition: Profiling.cpp:584
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
uint32_t m_PoolHeight
Pooling height value.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1864
int NetworkId
Definition: IRuntime.hpp:27
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:273
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:239
GPU Execution: OpenCL: ArmCompute.
ArmNN performs an optimization on each model/network before it gets loaded for execution.
Definition: INetwork.hpp:127
const BackendId & GetBackendId() const
Definition: Layer.hpp:277
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:49
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:514
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:238
virtual int Connect(IInputSlot &destination)=0
A Pooling2dDescriptor for the Pooling2dLayer.
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:475
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
unsigned int GetNumElements() const
Definition: Tensor.hpp:196