ArmNN
 20.11
ClFallbackTests.cpp File Reference
#include <backendsCommon/test/CommonTestUtils.hpp>
#include <test/GraphUtils.hpp>
#include <boost/test/unit_test.hpp>

Go to the source code of this file.

Functions

 BOOST_AUTO_TEST_CASE (ClImportEnabledFallbackToNeon)
 
 BOOST_AUTO_TEST_CASE (ClImportDisabledFallbackToNeon)
 
 BOOST_AUTO_TEST_CASE (ClImportEnabledFallbackSubgraphToNeon)
 
 BOOST_AUTO_TEST_CASE (ClImportDisableFallbackSubgraphToNeon)
 

Function Documentation

◆ BOOST_AUTO_TEST_CASE() [1/4]

BOOST_AUTO_TEST_CASE ( ClImportEnabledFallbackToNeon  )

Definition at line 14 of file ClFallbackTests.cpp.

References IConnectableLayer::BackendSelectionHint(), CheckOrder(), IOutputSlot::Connect(), armnn::CpuAcc, INetwork::Create(), IRuntime::Create(), armnn::Float32, Layer::GetBackendId(), GetFirstLayerWithName(), IConnectableLayer::GetInputSlot(), ProfilerManager::GetInstance(), IConnectableLayer::GetOutputSlot(), ProfilerManager::GetProfiler(), Layer::GetType(), armnn::GpuAcc, armnn::info, OptimizerOptions::m_ImportEnabled, armnn::MemCopy, armnn::Optimize(), Profiler::Print(), and IOutputSlot::SetTensorInfo().

15 {
16  using namespace armnn;
17 
19  IRuntimePtr runtime(IRuntime::Create(options));
20 
21  // Builds up the structure of the network.
23 
24  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
25  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
26  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
27  IConnectableLayer* add = net->AddAdditionLayer("add");
28  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
29  IConnectableLayer* output = net->AddOutputLayer(0, "output");
30 
31  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
32  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
33  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
34  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
35  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
36 
37  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
38 
39  input0->GetOutputSlot(0).SetTensorInfo(info);
40  input1->GetOutputSlot(0).SetTensorInfo(info);
41  input2->GetOutputSlot(0).SetTensorInfo(info);
42  add->GetOutputSlot(0).SetTensorInfo(info);
43  sub->GetOutputSlot(0).SetTensorInfo(info);
44 
45  std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
46  // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
47  sub->BackendSelectionHint(backends[1]);
48 
49  // optimize the network
50  OptimizerOptions optOptions;
51  optOptions.m_ImportEnabled = true;
52  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
53 
54  OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
55  Graph& graph = optNetObjPtr->GetGraph();
56 
57  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
58  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
59  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
60  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
61  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
62  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
63  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
64 
65  // Checks order is valid.
66  BOOST_TEST(CheckOrder(graph, layer0, layer1));
67  BOOST_TEST(CheckOrder(graph, layer1, layer2));
68  BOOST_TEST(CheckOrder(graph, layer2, layer3));
69  BOOST_TEST(CheckOrder(graph, layer3, layer4));
70  BOOST_TEST(CheckOrder(graph, layer4, layer5));
71  BOOST_TEST(CheckOrder(graph, layer5, layer6));
72 
73  // Use memory import between backends
74  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
75 
76  // Correctly use backend hint
77  BOOST_TEST((layer5->GetBackendId() == Compute::CpuAcc ));
78 
79  // Load it into the runtime. It should pass.
80  NetworkId netId;
81  std::string ignoredErrorMessage;
82  INetworkProperties networkProperties(true, true);
83 
84  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
85 
86  // Creates structures for input & output
87  std::vector<float> inputData0
88  {
89  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
90  };
91  std::vector<float> inputData1
92  {
93  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
94  };
95  std::vector<float> inputData2
96  {
97  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
98  };
99 
100  std::vector<float> outputData(12);
101 
102  std::vector<float> expectedOutput
103  {
104  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
105  };
106 
107  InputTensors inputTensors
108  {
109  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
110  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
111  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
112  };
113  OutputTensors outputTensors
114  {
115  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
116  };
117 
118  runtime->GetProfiler(netId)->EnableProfiling(true);
119 
120  // Do the inference
121  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
122 
123  // Retrieve the Profiler.Print() output to get the workload execution
125  std::stringstream ss;
126  profilerManager.GetProfiler()->Print(ss);;
127  std::string dump = ss.str();
128 
129  // Executed Subtraction using CpuAcc
130  std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
131  BOOST_TEST(found != std::string::npos);
132 
133  // Contain CopyMemGeneric
134  found = dump.find("CopyMemGeneric");
135  BOOST_TEST(found != std::string::npos);
136 
137  // Check output is as expected
138  BOOST_TEST(outputData == expectedOutput);
139 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:32
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:486
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
Definition: GraphUtils.cpp:68
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:25
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
int NetworkId
Definition: IRuntime.hpp:20
Copyright (c) 2020 ARM Limited.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
Profiler * GetProfiler()
Definition: Profiling.cpp:498
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:306
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1011
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:314
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:600
GPU Execution: OpenCL: ArmCompute.
const BackendId & GetBackendId() const
Definition: Layer.hpp:266
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
LayerType GetType() const
Definition: Layer.hpp:262
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:101
virtual int Connect(IInputSlot &destination)=0
void Print(std::ostream &outStream) const override
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:330
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:46

◆ BOOST_AUTO_TEST_CASE() [2/4]

BOOST_AUTO_TEST_CASE ( ClImportDisabledFallbackToNeon  )

Definition at line 141 of file ClFallbackTests.cpp.

References IConnectableLayer::BackendSelectionHint(), CheckOrder(), IOutputSlot::Connect(), armnn::CpuAcc, INetwork::Create(), IRuntime::Create(), armnn::Float32, Layer::GetBackendId(), GetFirstLayerWithName(), IConnectableLayer::GetInputSlot(), ProfilerManager::GetInstance(), IConnectableLayer::GetOutputSlot(), ProfilerManager::GetProfiler(), Layer::GetType(), armnn::GpuAcc, armnn::info, armnn::MemCopy, armnn::Optimize(), Profiler::Print(), and IOutputSlot::SetTensorInfo().

142 {
143  using namespace armnn;
144 
146  IRuntimePtr runtime(IRuntime::Create(options));
147 
148  // Builds up the structure of the network.
150 
151  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
152  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
153  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
154  IConnectableLayer* add = net->AddAdditionLayer("add");
155  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
156  IConnectableLayer* output = net->AddOutputLayer(0, "output");
157 
158  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
159  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
160  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
161  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
162  sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
163 
164  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
165 
166  input0->GetOutputSlot(0).SetTensorInfo(info);
167  input1->GetOutputSlot(0).SetTensorInfo(info);
168  input2->GetOutputSlot(0).SetTensorInfo(info);
169  add->GetOutputSlot(0).SetTensorInfo(info);
170  sub->GetOutputSlot(0).SetTensorInfo(info);
171 
172  std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
173  // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
174  sub->BackendSelectionHint(backends[1]);
175 
176  // optimize the network
177  OptimizerOptions optOptions;
178  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
179 
180  OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
181  Graph& graph = optNetObjPtr->GetGraph();
182 
183  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
184  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
185  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
186  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
187  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
188  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
189  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
190 
191  // Checks order is valid.
192  BOOST_TEST(CheckOrder(graph, layer0, layer1));
193  BOOST_TEST(CheckOrder(graph, layer1, layer2));
194  BOOST_TEST(CheckOrder(graph, layer2, layer3));
195  BOOST_TEST(CheckOrder(graph, layer3, layer4));
196  BOOST_TEST(CheckOrder(graph, layer4, layer5));
197  BOOST_TEST(CheckOrder(graph, layer5, layer6));
198 
199  // Use memory import between backends
200  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
201 
202  // Correctly use backend hint
203  BOOST_TEST((layer5->GetBackendId() == Compute::CpuAcc ));
204 
205  // Load it into the runtime. It should pass.
206  NetworkId netId;
207  runtime->LoadNetwork(netId, std::move(optNet));
208 
209  // Creates structures for input & output
210  std::vector<float> inputData0
211  {
212  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
213  };
214  std::vector<float> inputData1
215  {
216  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
217  };
218  std::vector<float> inputData2
219  {
220  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
221  };
222 
223  std::vector<float> outputData(12);
224 
225  std::vector<float> expectedOutput
226  {
227  11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
228  };
229 
230  InputTensors inputTensors
231  {
232  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
233  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
234  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
235  };
236  OutputTensors outputTensors
237  {
238  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
239  };
240 
241  runtime->GetProfiler(netId)->EnableProfiling(true);
242 
243  // Do the inference
244  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
245 
246  // Retrieve the Profiler.Print() output to get the workload execution
248  std::stringstream ss;
249  profilerManager.GetProfiler()->Print(ss);;
250  std::string dump = ss.str();
251 
252  // Executed Subtraction using CpuAcc
253  std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
254  BOOST_TEST(found != std::string::npos);
255 
256  // Contain CopyMemGeneric
257  found = dump.find("CopyMemGeneric");
258  BOOST_TEST(found != std::string::npos);
259 
260  // Check output is as expected
261  BOOST_TEST(outputData == expectedOutput);
262 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:32
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:486
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
Definition: GraphUtils.cpp:68
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:25
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
int NetworkId
Definition: IRuntime.hpp:20
Copyright (c) 2020 ARM Limited.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
Profiler * GetProfiler()
Definition: Profiling.cpp:498
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:306
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1011
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:314
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:600
GPU Execution: OpenCL: ArmCompute.
const BackendId & GetBackendId() const
Definition: Layer.hpp:266
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
LayerType GetType() const
Definition: Layer.hpp:262
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:101
virtual int Connect(IInputSlot &destination)=0
void Print(std::ostream &outStream) const override
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:330
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:46

◆ BOOST_AUTO_TEST_CASE() [3/4]

BOOST_AUTO_TEST_CASE ( ClImportEnabledFallbackSubgraphToNeon  )

Definition at line 264 of file ClFallbackTests.cpp.

References IConnectableLayer::BackendSelectionHint(), CheckOrder(), IOutputSlot::Connect(), armnn::CpuAcc, INetwork::Create(), IRuntime::Create(), armnn::Float32, Layer::GetBackendId(), GetFirstLayerWithName(), IConnectableLayer::GetInputSlot(), ProfilerManager::GetInstance(), IConnectableLayer::GetOutputSlot(), ProfilerManager::GetProfiler(), Layer::GetType(), armnn::GpuAcc, armnn::info, OptimizerOptions::m_ImportEnabled, armnn::MemCopy, armnn::Optimize(), Profiler::Print(), and IOutputSlot::SetTensorInfo().

265 {
266  using namespace armnn;
267 
269  IRuntimePtr runtime(IRuntime::Create(options));
270 
271  // Builds up the structure of the network.
273 
274  Pooling2dDescriptor desc;
275 
276  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
277  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
278  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
279  IConnectableLayer* add = net->AddAdditionLayer("add");
280  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
281  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
282  IConnectableLayer* output = net->AddOutputLayer(0, "output");
283 
284  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
285  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
286  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
287  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
288  sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
289  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
290 
291  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
292  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
293 
294  input0->GetOutputSlot(0).SetTensorInfo(info);
295  input1->GetOutputSlot(0).SetTensorInfo(info);
296  input2->GetOutputSlot(0).SetTensorInfo(info);
297  add->GetOutputSlot(0).SetTensorInfo(info);
298  sub->GetOutputSlot(0).SetTensorInfo(info);
299  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
300 
301  std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
302  // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
303  sub->BackendSelectionHint(backends[1]);
304 
305  // optimize the network
306  OptimizerOptions optOptions;
307  optOptions.m_ImportEnabled = true;
308  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
309 
310  OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
311  Graph& graph = optNetObjPtr->GetGraph();
312 
313  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
314  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
315  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
316  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
317  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
318  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
319  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
320  armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
321  armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
322 
323  // Checks order is valid.
324  BOOST_TEST(CheckOrder(graph, layer0, layer1));
325  BOOST_TEST(CheckOrder(graph, layer1, layer2));
326  BOOST_TEST(CheckOrder(graph, layer2, layer3));
327  BOOST_TEST(CheckOrder(graph, layer3, layer4));
328  BOOST_TEST(CheckOrder(graph, layer4, layer5));
329  BOOST_TEST(CheckOrder(graph, layer5, layer6));
330  BOOST_TEST(CheckOrder(graph, layer6, layer7));
331  BOOST_TEST(CheckOrder(graph, layer7, layer8));
332 
333  // Use memory import between backends
334  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
335  BOOST_TEST((layer6->GetType() == LayerType::MemCopy));
336 
337  // Correctly use backend hint
338  BOOST_TEST((layer5->GetBackendId() == Compute::CpuAcc ));
339 
340  // Load it into the runtime. It should pass.
341  NetworkId netId;
342  std::string ignoredErrorMessage;
343  INetworkProperties networkProperties(true, true);
344 
345  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
346 
347  // Creates structures for input & output
348  std::vector<float> inputData0
349  {
350  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
351  };
352  std::vector<float> inputData1
353  {
354  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
355  };
356  std::vector<float> inputData2
357  {
358  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
359  };
360 
361  std::vector<float> outputData(2);
362 
363  std::vector<float> expectedOutput{ 11.0f, -1.0f };
364 
365  InputTensors inputTensors
366  {
367  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
368  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
369  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
370  };
371  OutputTensors outputTensors
372  {
373  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
374  };
375 
376  runtime->GetProfiler(netId)->EnableProfiling(true);
377 
378  // Do the inference
379  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
380 
381  // Retrieve the Profiler.Print() output to get the workload execution
383  std::stringstream ss;
384  profilerManager.GetProfiler()->Print(ss);;
385  std::string dump = ss.str();
386 
387  // Executed Subtraction using CpuAcc
388  std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
389  BOOST_TEST(found != std::string::npos);
390 
391  // Correctly switch back to GpuAcc
392  found = dump.find("ClPooling2dWorkload_Execute");
393  BOOST_TEST(found != std::string::npos);
394 
395  // Contain CopyMemGeneric
396  found = dump.find("CopyMemGeneric");
397  BOOST_TEST(found != std::string::npos);
398 
399  // Check output is as expected
400  BOOST_TEST(outputData == expectedOutput);
401 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:32
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:486
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
Definition: GraphUtils.cpp:68
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:25
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
int NetworkId
Definition: IRuntime.hpp:20
Copyright (c) 2020 ARM Limited.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
Profiler * GetProfiler()
Definition: Profiling.cpp:498
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:306
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1011
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:314
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:600
GPU Execution: OpenCL: ArmCompute.
const BackendId & GetBackendId() const
Definition: Layer.hpp:266
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
LayerType GetType() const
Definition: Layer.hpp:262
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:101
virtual int Connect(IInputSlot &destination)=0
A Pooling2dDescriptor for the Pooling2dLayer.
void Print(std::ostream &outStream) const override
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:330
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:46

◆ BOOST_AUTO_TEST_CASE() [4/4]

BOOST_AUTO_TEST_CASE ( ClImportDisableFallbackSubgraphToNeon  )

Definition at line 403 of file ClFallbackTests.cpp.

References IConnectableLayer::BackendSelectionHint(), BOOST_AUTO_TEST_SUITE_END(), CheckOrder(), IOutputSlot::Connect(), armnn::CpuAcc, INetwork::Create(), IRuntime::Create(), armnn::Float32, Layer::GetBackendId(), GetFirstLayerWithName(), IConnectableLayer::GetInputSlot(), ProfilerManager::GetInstance(), IConnectableLayer::GetOutputSlot(), ProfilerManager::GetProfiler(), Layer::GetType(), armnn::GpuAcc, armnn::info, armnn::MemCopy, armnn::Optimize(), Profiler::Print(), and IOutputSlot::SetTensorInfo().

404 {
405  using namespace armnn;
406 
408  IRuntimePtr runtime(IRuntime::Create(options));
409 
410  // Builds up the structure of the network.
412 
413  Pooling2dDescriptor desc;
414 
415  IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
416  IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
417  IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
418  IConnectableLayer* add = net->AddAdditionLayer("add");
419  IConnectableLayer* sub = net->AddSubtractionLayer("sub");
420  IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
421  IConnectableLayer* output = net->AddOutputLayer(0, "output");
422 
423  input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
424  input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
425  input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
426  add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
427  sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
428  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
429 
430  TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
431  TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
432 
433  input0->GetOutputSlot(0).SetTensorInfo(info);
434  input1->GetOutputSlot(0).SetTensorInfo(info);
435  input2->GetOutputSlot(0).SetTensorInfo(info);
436  add->GetOutputSlot(0).SetTensorInfo(info);
437  sub->GetOutputSlot(0).SetTensorInfo(info);
438  pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
439 
440  std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
441  // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
442  sub->BackendSelectionHint(backends[1]);
443 
444  // optimize the network
445  OptimizerOptions optOptions;
446  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
447 
448  OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
449  Graph& graph = optNetObjPtr->GetGraph();
450 
451  armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
452  armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
453  armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
454  armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
455  armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
456  armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
457  armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
458  armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
459  armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
460 
461  // Checks order is valid.
462  BOOST_TEST(CheckOrder(graph, layer0, layer1));
463  BOOST_TEST(CheckOrder(graph, layer1, layer2));
464  BOOST_TEST(CheckOrder(graph, layer2, layer3));
465  BOOST_TEST(CheckOrder(graph, layer3, layer4));
466  BOOST_TEST(CheckOrder(graph, layer4, layer5));
467  BOOST_TEST(CheckOrder(graph, layer5, layer6));
468  BOOST_TEST(CheckOrder(graph, layer6, layer7));
469  BOOST_TEST(CheckOrder(graph, layer7, layer8));
470 
471  // Use memory import between backends
472  BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
473  BOOST_TEST((layer6->GetType() == LayerType::MemCopy));
474 
475  // Correctly use backend hint
476  BOOST_TEST((layer5->GetBackendId() == Compute::CpuAcc ));
477 
478  // Load it into the runtime. It should pass.
479  NetworkId netId;
480  runtime->LoadNetwork(netId, std::move(optNet));
481 
482  // Creates structures for input & output
483  std::vector<float> inputData0
484  {
485  1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
486  };
487  std::vector<float> inputData1
488  {
489  0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
490  };
491  std::vector<float> inputData2
492  {
493  12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
494  };
495 
496  std::vector<float> outputData(2);
497 
498  std::vector<float> expectedOutput{ 11.0f, -1.0f };
499 
500  InputTensors inputTensors
501  {
502  { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
503  { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
504  { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
505  };
506  OutputTensors outputTensors
507  {
508  { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
509  };
510 
511  runtime->GetProfiler(netId)->EnableProfiling(true);
512 
513  // Do the inference
514  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
515 
516  // Retrieve the Profiler.Print() output to get the workload execution
518  std::stringstream ss;
519  profilerManager.GetProfiler()->Print(ss);;
520  std::string dump = ss.str();
521 
522  // Executed Subtraction using CpuAcc
523  std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
524  BOOST_TEST(found != std::string::npos);
525 
526  // Correctly switch back to GpuAcc
527  found = dump.find("ClPooling2dWorkload_Execute");
528  BOOST_TEST(found != std::string::npos);
529 
530  // Contain CopyMemGeneric
531  found = dump.find("CopyMemGeneric");
532  BOOST_TEST(found != std::string::npos);
533 
534  // Check output is as expected
535  BOOST_TEST(outputData == expectedOutput);
536 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:32
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:486
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
Definition: GraphUtils.cpp:68
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:25
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
int NetworkId
Definition: IRuntime.hpp:20
Copyright (c) 2020 ARM Limited.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
Profiler * GetProfiler()
Definition: Profiling.cpp:498
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:306
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1011
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:314
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:600
GPU Execution: OpenCL: ArmCompute.
const BackendId & GetBackendId() const
Definition: Layer.hpp:266
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
LayerType GetType() const
Definition: Layer.hpp:262
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:101
virtual int Connect(IInputSlot &destination)=0
A Pooling2dDescriptor for the Pooling2dLayer.
void Print(std::ostream &outStream) const override
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:330
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:46