ArmNN
 21.02
ClFallbackTests.cpp File Reference
#include <backendsCommon/test/CommonTestUtils.hpp>
#include <test/GraphUtils.hpp>
#include <boost/test/unit_test.hpp>

Go to the source code of this file.

Functions

 BOOST_AUTO_TEST_CASE (ClImportEnabledFallbackToNeon)
 
 BOOST_AUTO_TEST_CASE (ClImportDisabledFallbackToNeon)
 
 BOOST_AUTO_TEST_CASE (ClImportEnabledFallbackSubgraphToNeon)
 
 BOOST_AUTO_TEST_CASE (ClImportDisableFallbackSubgraphToNeon)
 

Function Documentation

◆ BOOST_AUTO_TEST_CASE() [1/4]

BOOST_AUTO_TEST_CASE ( ClImportEnabledFallbackToNeon  )

Definition at line 14 of file ClFallbackTests.cpp.

References IConnectableLayer::BackendSelectionHint(), CheckOrder(), IOutputSlot::Connect(), armnn::CpuAcc, IRuntime::Create(), INetwork::Create(), armnn::Float32, Layer::GetBackendId(), GetFirstLayerWithName(), armnn::GetGraphForTesting(), IConnectableLayer::GetInputSlot(), ProfilerManager::GetInstance(), IConnectableLayer::GetOutputSlot(), ProfilerManager::GetProfiler(), Layer::GetType(), armnn::GpuAcc, armnn::info, OptimizerOptions::m_ImportEnabled, armnn::MemCopy, armnn::Optimize(), IProfiler::Print(), and IOutputSlot::SetTensorInfo().

15 {
16  using namespace armnn;
17 
19  IRuntimePtr runtime(IRuntime::Create(options));
20 
21  // Builds up the structure of the network.
23 
24  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
25  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
26  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
27  IConnectableLayer* add = net->AddAdditionLayer("add");
28  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
29  IConnectableLayer* output = net->AddOutputLayer(0, "output");
30 
31  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
32  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
33  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
34  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
35  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
36 
37  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
38 
39  input0->GetOutputSlot(0).SetTensorInfo(info);
40  input1->GetOutputSlot(0).SetTensorInfo(info);
41  input2->GetOutputSlot(0).SetTensorInfo(info);
42  add->GetOutputSlot(0).SetTensorInfo(info);
43  sub->GetOutputSlot(0).SetTensorInfo(info);
44 
45  std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
46  // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
47  sub->BackendSelectionHint(backends[1]);
48 
49  // optimize the network
50  OptimizerOptions optOptions;
51  optOptions.m_ImportEnabled = true;
52  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
53 
54  Graph& graph = GetGraphForTesting(optNet.get());
55 
56  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
57  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
58  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
59  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
60  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
61  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
62  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
63 
64  // Checks order is valid.
65  BOOST_TEST(CheckOrder(graph, layer0, layer1));
66  BOOST_TEST(CheckOrder(graph, layer1, layer2));
67  BOOST_TEST(CheckOrder(graph, layer2, layer3));
68  BOOST_TEST(CheckOrder(graph, layer3, layer4));
69  BOOST_TEST(CheckOrder(graph, layer4, layer5));
70  BOOST_TEST(CheckOrder(graph, layer5, layer6));
71 
72  // Use memory import between backends
73  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
74 
75  // Correctly use backend hint
76  BOOST_TEST((layer5->GetBackendId() == Compute::CpuAcc ));
77 
78  // Load it into the runtime. It should pass.
79  NetworkId netId;
80  std::string ignoredErrorMessage;
81  INetworkProperties networkProperties(true, true);
82 
83  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
84 
85  // Creates structures for input & output
86  std::vector<float> inputData0
87  {
88  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
89  };
90  std::vector<float> inputData1
91  {
92  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
93  };
94  std::vector<float> inputData2
95  {
96  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
97  };
98 
99  std::vector<float> outputData(12);
100 
101  std::vector<float> expectedOutput
102  {
103  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
104  };
105 
106  InputTensors inputTensors
107  {
108  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
109  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
110  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
111  };
112  OutputTensors outputTensors
113  {
114  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
115  };
116 
117  runtime->GetProfiler(netId)->EnableProfiling(true);
118 
119  // Do the inference
120  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
121 
122  // Retrieve the Profiler.Print() output to get the workload execution
124  std::stringstream ss;
125  profilerManager.GetProfiler()->Print(ss);;
126  std::string dump = ss.str();
127 
128  // Executed Subtraction using CpuAcc
129  std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
130  BOOST_TEST(found != std::string::npos);
131 
132  // Contain CopyMemGeneric
133  found = dump.find("CopyMemGeneric");
134  BOOST_TEST(found != std::string::npos);
135 
136  // Check output is as expected
137  BOOST_TEST(outputData == expectedOutput);
138 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:37
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:62
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:489
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
Definition: GraphUtils.cpp:68
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:26
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:522
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
int NetworkId
Definition: IRuntime.hpp:20
Copyright (c) 2021 ARM Limited and Contributors.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
Definition: Profiling.cpp:501
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:306
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1502
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:314
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:265
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:174
GPU Execution: OpenCL: ArmCompute.
const BackendId & GetBackendId() const
Definition: Layer.hpp:269
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:25
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:173
virtual int Connect(IInputSlot &destination)=0
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:510

◆ BOOST_AUTO_TEST_CASE() [2/4]

BOOST_AUTO_TEST_CASE ( ClImportDisabledFallbackToNeon  )

Definition at line 140 of file ClFallbackTests.cpp.

References IConnectableLayer::BackendSelectionHint(), CheckOrder(), IOutputSlot::Connect(), armnn::CpuAcc, IRuntime::Create(), INetwork::Create(), armnn::Float32, Layer::GetBackendId(), GetFirstLayerWithName(), armnn::GetGraphForTesting(), IConnectableLayer::GetInputSlot(), ProfilerManager::GetInstance(), IConnectableLayer::GetOutputSlot(), ProfilerManager::GetProfiler(), Layer::GetType(), armnn::GpuAcc, armnn::info, armnn::MemCopy, armnn::Optimize(), IProfiler::Print(), and IOutputSlot::SetTensorInfo().

141 {
142  using namespace armnn;
143 
145  IRuntimePtr runtime(IRuntime::Create(options));
146 
147  // Builds up the structure of the network.
149 
150  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
151  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
152  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
153  IConnectableLayer* add = net->AddAdditionLayer("add");
154  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
155  IConnectableLayer* output = net->AddOutputLayer(0, "output");
156 
157  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
158  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
159  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
160  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
161  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
162 
163  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
164 
165  input0->GetOutputSlot(0).SetTensorInfo(info);
166  input1->GetOutputSlot(0).SetTensorInfo(info);
167  input2->GetOutputSlot(0).SetTensorInfo(info);
168  add->GetOutputSlot(0).SetTensorInfo(info);
169  sub->GetOutputSlot(0).SetTensorInfo(info);
170 
171  std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
172  // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
173  sub->BackendSelectionHint(backends[1]);
174 
175  // optimize the network
176  OptimizerOptions optOptions;
177  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
178 
179  Graph& graph = GetGraphForTesting(optNet.get());
180 
181  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
182  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
183  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
184  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
185  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
186  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
187  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
188 
189  // Checks order is valid.
190  BOOST_TEST(CheckOrder(graph, layer0, layer1));
191  BOOST_TEST(CheckOrder(graph, layer1, layer2));
192  BOOST_TEST(CheckOrder(graph, layer2, layer3));
193  BOOST_TEST(CheckOrder(graph, layer3, layer4));
194  BOOST_TEST(CheckOrder(graph, layer4, layer5));
195  BOOST_TEST(CheckOrder(graph, layer5, layer6));
196 
197  // Use memory import between backends
198  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
199 
200  // Correctly use backend hint
201  BOOST_TEST((layer5->GetBackendId() == Compute::CpuAcc ));
202 
203  // Load it into the runtime. It should pass.
204  NetworkId netId;
205  runtime->LoadNetwork(netId, std::move(optNet));
206 
207  // Creates structures for input & output
208  std::vector<float> inputData0
209  {
210  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
211  };
212  std::vector<float> inputData1
213  {
214  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
215  };
216  std::vector<float> inputData2
217  {
218  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
219  };
220 
221  std::vector<float> outputData(12);
222 
223  std::vector<float> expectedOutput
224  {
225  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
226  };
227 
228  InputTensors inputTensors
229  {
230  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
231  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
232  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
233  };
234  OutputTensors outputTensors
235  {
236  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
237  };
238 
239  runtime->GetProfiler(netId)->EnableProfiling(true);
240 
241  // Do the inference
242  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
243 
244  // Retrieve the Profiler.Print() output to get the workload execution
246  std::stringstream ss;
247  profilerManager.GetProfiler()->Print(ss);;
248  std::string dump = ss.str();
249 
250  // Executed Subtraction using CpuAcc
251  std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
252  BOOST_TEST(found != std::string::npos);
253 
254  // Contain CopyMemGeneric
255  found = dump.find("CopyMemGeneric");
256  BOOST_TEST(found != std::string::npos);
257 
258  // Check output is as expected
259  BOOST_TEST(outputData == expectedOutput);
260 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:37
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:62
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:489
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
Definition: GraphUtils.cpp:68
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:26
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:522
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
int NetworkId
Definition: IRuntime.hpp:20
Copyright (c) 2021 ARM Limited and Contributors.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
Definition: Profiling.cpp:501
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:306
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1502
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:314
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:265
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:174
GPU Execution: OpenCL: ArmCompute.
const BackendId & GetBackendId() const
Definition: Layer.hpp:269
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:25
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:173
virtual int Connect(IInputSlot &destination)=0
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:510

◆ BOOST_AUTO_TEST_CASE() [3/4]

BOOST_AUTO_TEST_CASE ( ClImportEnabledFallbackSubgraphToNeon  )

Definition at line 262 of file ClFallbackTests.cpp.

References IConnectableLayer::BackendSelectionHint(), CheckOrder(), IOutputSlot::Connect(), armnn::CpuAcc, IRuntime::Create(), INetwork::Create(), armnn::Float32, Layer::GetBackendId(), GetFirstLayerWithName(), armnn::GetGraphForTesting(), IConnectableLayer::GetInputSlot(), ProfilerManager::GetInstance(), IConnectableLayer::GetOutputSlot(), ProfilerManager::GetProfiler(), Layer::GetType(), armnn::GpuAcc, armnn::info, OptimizerOptions::m_ImportEnabled, armnn::MemCopy, armnn::Optimize(), IProfiler::Print(), and IOutputSlot::SetTensorInfo().

263 {
264  using namespace armnn;
265 
267  IRuntimePtr runtime(IRuntime::Create(options));
268 
269  // Builds up the structure of the network.
271 
272  Pooling2dDescriptor desc;
273 
274  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
275  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
276  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
277  IConnectableLayer* add = net->AddAdditionLayer("add");
278  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
279  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
280  IConnectableLayer* output = net->AddOutputLayer(0, "output");
281 
282  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
283  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
284  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
285  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
286  sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
287  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
288 
289  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
290  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
291 
292  input0->GetOutputSlot(0).SetTensorInfo(info);
293  input1->GetOutputSlot(0).SetTensorInfo(info);
294  input2->GetOutputSlot(0).SetTensorInfo(info);
295  add->GetOutputSlot(0).SetTensorInfo(info);
296  sub->GetOutputSlot(0).SetTensorInfo(info);
297  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
298 
299  std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
300  // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
301  sub->BackendSelectionHint(backends[1]);
302 
303  // optimize the network
304  OptimizerOptions optOptions;
305  optOptions.m_ImportEnabled = true;
306  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
307 
308  Graph& graph = GetGraphForTesting(optNet.get());
309 
310  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
311  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
312  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
313  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
314  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
315  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
316  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
317  armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
318  armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
319 
320  // Checks order is valid.
321  BOOST_TEST(CheckOrder(graph, layer0, layer1));
322  BOOST_TEST(CheckOrder(graph, layer1, layer2));
323  BOOST_TEST(CheckOrder(graph, layer2, layer3));
324  BOOST_TEST(CheckOrder(graph, layer3, layer4));
325  BOOST_TEST(CheckOrder(graph, layer4, layer5));
326  BOOST_TEST(CheckOrder(graph, layer5, layer6));
327  BOOST_TEST(CheckOrder(graph, layer6, layer7));
328  BOOST_TEST(CheckOrder(graph, layer7, layer8));
329 
330  // Use memory import between backends
331  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
332  BOOST_TEST((layer6->GetType() == LayerType::MemCopy));
333 
334  // Correctly use backend hint
335  BOOST_TEST((layer5->GetBackendId() == Compute::CpuAcc ));
336 
337  // Load it into the runtime. It should pass.
338  NetworkId netId;
339  std::string ignoredErrorMessage;
340  INetworkProperties networkProperties(true, true);
341 
342  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
343 
344  // Creates structures for input & output
345  std::vector<float> inputData0
346  {
347  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
348  };
349  std::vector<float> inputData1
350  {
351  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
352  };
353  std::vector<float> inputData2
354  {
355  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
356  };
357 
358  std::vector<float> outputData(2);
359 
360  std::vector<float> expectedOutput{ 11.0f, -1.0f };
361 
362  InputTensors inputTensors
363  {
364  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
365  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
366  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
367  };
368  OutputTensors outputTensors
369  {
370  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
371  };
372 
373  runtime->GetProfiler(netId)->EnableProfiling(true);
374 
375  // Do the inference
376  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
377 
378  // Retrieve the Profiler.Print() output to get the workload execution
380  std::stringstream ss;
381  profilerManager.GetProfiler()->Print(ss);;
382  std::string dump = ss.str();
383 
384  // Executed Subtraction using CpuAcc
385  std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
386  BOOST_TEST(found != std::string::npos);
387 
388  // Correctly switch back to GpuAcc
389  found = dump.find("ClPooling2dWorkload_Execute");
390  BOOST_TEST(found != std::string::npos);
391 
392  // Contain CopyMemGeneric
393  found = dump.find("CopyMemGeneric");
394  BOOST_TEST(found != std::string::npos);
395 
396  // Check output is as expected
397  BOOST_TEST(outputData == expectedOutput);
398 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:37
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:62
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:489
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
Definition: GraphUtils.cpp:68
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:26
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:522
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
int NetworkId
Definition: IRuntime.hpp:20
Copyright (c) 2021 ARM Limited and Contributors.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
Definition: Profiling.cpp:501
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:306
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1502
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:314
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:265
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:174
GPU Execution: OpenCL: ArmCompute.
const BackendId & GetBackendId() const
Definition: Layer.hpp:269
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:25
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:173
virtual int Connect(IInputSlot &destination)=0
A Pooling2dDescriptor for the Pooling2dLayer.
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:510

◆ BOOST_AUTO_TEST_CASE() [4/4]

BOOST_AUTO_TEST_CASE ( ClImportDisableFallbackSubgraphToNeon  )

Definition at line 400 of file ClFallbackTests.cpp.

References IConnectableLayer::BackendSelectionHint(), BOOST_AUTO_TEST_SUITE_END(), CheckOrder(), IOutputSlot::Connect(), armnn::CpuAcc, IRuntime::Create(), INetwork::Create(), armnn::Float32, Layer::GetBackendId(), GetFirstLayerWithName(), armnn::GetGraphForTesting(), IConnectableLayer::GetInputSlot(), ProfilerManager::GetInstance(), IConnectableLayer::GetOutputSlot(), ProfilerManager::GetProfiler(), Layer::GetType(), armnn::GpuAcc, armnn::info, armnn::MemCopy, armnn::Optimize(), IProfiler::Print(), and IOutputSlot::SetTensorInfo().

401 {
402  using namespace armnn;
403 
405  IRuntimePtr runtime(IRuntime::Create(options));
406 
407  // Builds up the structure of the network.
409 
410  Pooling2dDescriptor desc;
411 
412  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
413  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
414  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
415  IConnectableLayer* add = net->AddAdditionLayer("add");
416  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
417  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
418  IConnectableLayer* output = net->AddOutputLayer(0, "output");
419 
420  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
421  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
422  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
423  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
424  sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
425  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
426 
427  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
428  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
429 
430  input0->GetOutputSlot(0).SetTensorInfo(info);
431  input1->GetOutputSlot(0).SetTensorInfo(info);
432  input2->GetOutputSlot(0).SetTensorInfo(info);
433  add->GetOutputSlot(0).SetTensorInfo(info);
434  sub->GetOutputSlot(0).SetTensorInfo(info);
435  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
436 
437  std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
438  // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
439  sub->BackendSelectionHint(backends[1]);
440 
441  // optimize the network
442  OptimizerOptions optOptions;
443  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
444 
445  Graph& graph = GetGraphForTesting(optNet.get());
446 
447  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
448  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
449  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
450  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
451  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
452  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
453  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
454  armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
455  armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
456 
457  // Checks order is valid.
458  BOOST_TEST(CheckOrder(graph, layer0, layer1));
459  BOOST_TEST(CheckOrder(graph, layer1, layer2));
460  BOOST_TEST(CheckOrder(graph, layer2, layer3));
461  BOOST_TEST(CheckOrder(graph, layer3, layer4));
462  BOOST_TEST(CheckOrder(graph, layer4, layer5));
463  BOOST_TEST(CheckOrder(graph, layer5, layer6));
464  BOOST_TEST(CheckOrder(graph, layer6, layer7));
465  BOOST_TEST(CheckOrder(graph, layer7, layer8));
466 
467  // Use memory import between backends
468  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
469  BOOST_TEST((layer6->GetType() == LayerType::MemCopy));
470 
471  // Correctly use backend hint
472  BOOST_TEST((layer5->GetBackendId() == Compute::CpuAcc ));
473 
474  // Load it into the runtime. It should pass.
475  NetworkId netId;
476  runtime->LoadNetwork(netId, std::move(optNet));
477 
478  // Creates structures for input & output
479  std::vector<float> inputData0
480  {
481  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
482  };
483  std::vector<float> inputData1
484  {
485  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
486  };
487  std::vector<float> inputData2
488  {
489  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
490  };
491 
492  std::vector<float> outputData(2);
493 
494  std::vector<float> expectedOutput{ 11.0f, -1.0f };
495 
496  InputTensors inputTensors
497  {
498  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
499  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
500  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
501  };
502  OutputTensors outputTensors
503  {
504  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
505  };
506 
507  runtime->GetProfiler(netId)->EnableProfiling(true);
508 
509  // Do the inference
510  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
511 
512  // Retrieve the Profiler.Print() output to get the workload execution
514  std::stringstream ss;
515  profilerManager.GetProfiler()->Print(ss);;
516  std::string dump = ss.str();
517 
518  // Executed Subtraction using CpuAcc
519  std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
520  BOOST_TEST(found != std::string::npos);
521 
522  // Correctly switch back to GpuAcc
523  found = dump.find("ClPooling2dWorkload_Execute");
524  BOOST_TEST(found != std::string::npos);
525 
526  // Contain CopyMemGeneric
527  found = dump.find("CopyMemGeneric");
528  BOOST_TEST(found != std::string::npos);
529 
530  // Check output is as expected
531  BOOST_TEST(outputData == expectedOutput);
532 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:37
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:62
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:489
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
Definition: GraphUtils.cpp:68
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:26
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:522
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
int NetworkId
Definition: IRuntime.hpp:20
Copyright (c) 2021 ARM Limited and Contributors.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
Definition: Profiling.cpp:501
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:306
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1502
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:314
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:265
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:174
GPU Execution: OpenCL: ArmCompute.
const BackendId & GetBackendId() const
Definition: Layer.hpp:269
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:25
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:173
virtual int Connect(IInputSlot &destination)=0
A Pooling2dDescriptor for the Pooling2dLayer.
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:510