ArmNN
 22.05.01
CreateWorkload.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include "TestUtils.hpp"
8 
9 #include <Graph.hpp>
10 #include <Network.hpp>
11 #include <ResolveType.hpp>
12 
17 #include <armnn/utility/Assert.hpp>
20 
21 #include <doctest/doctest.h>
22 
23 #include <utility>
24 
25 using namespace armnn;
26 
27 namespace
28 {
29 
30 using namespace std;
31 
32 // Calls CreateWorkload for a layer, and checks the returned pointer is of the correct type.
33 template<typename Workload>
34 std::unique_ptr<Workload> MakeAndCheckWorkload(Layer& layer,
35  const IWorkloadFactory& factory,
36  const ModelOptions& modelOptions = {})
37 {
38  std::unique_ptr<IWorkload> workload = layer.CreateWorkload(factory);
39  CHECK_MESSAGE(workload.get() == PolymorphicDowncast<Workload*>(workload.get()),
40  "Cannot convert to derived class");
41  std::string reasonIfUnsupported;
42  layer.SetBackendId(factory.GetBackendId());
43  CHECK(factory.IsLayerSupported(layer, layer.GetDataType(), reasonIfUnsupported, modelOptions));
44  return std::unique_ptr<Workload>(static_cast<Workload*>(workload.release()));
45 }
46 
47 // Helper function to create tensor handlers for workloads, assuming they all use the same factory.
48 void CreateTensorHandles(armnn::Graph& graph,
49  armnn::IWorkloadFactory& factory)
50 {
51  TensorHandleFactoryRegistry tmpRegistry;
52  for (auto&& layer : graph.TopologicalSort())
53  {
54  layer->CreateTensorHandles(tmpRegistry, factory);
55  }
56 }
57 
58 /////////////////////////////////////////////////////////////////////////////////////////////
59 // The following functions are called by backendsCommon/test/CreateWorkload*.cpp
60 // They build very simple graphs, and then create a workload.
61 // Some checks are performed on the workload to ensure parameters have been passed correctly.
62 // They return the created workloads so that backend-specific checks can be performed.
63 /////////////////////////////////////////////////////////////////////////////////////////////
64 
65 template <typename ActivationWorkload, armnn::DataType DataType>
66 std::unique_ptr<ActivationWorkload> CreateActivationWorkloadTest(armnn::IWorkloadFactory& factory,
67  armnn::Graph& graph)
68 {
69  // Creates the layer we're testing.
70  ActivationDescriptor layerDesc;
72  layerDesc.m_A = 3.5f;
73  layerDesc.m_B = -10.0f;
74 
75  ActivationLayer* const layer = graph.AddLayer<ActivationLayer>(layerDesc, "layer");
76 
77  // Creates extra layers.
78  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
79  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
80 
81  // Connects up.
82  armnn::TensorInfo tensorInfo({1, 1}, DataType);
83 
84  Connect(input, layer, tensorInfo);
85  Connect(layer, output, tensorInfo);
86 
87  CreateTensorHandles(graph, factory);
88 
89  // Makes the workload and checks it.
90  auto workload = MakeAndCheckWorkload<ActivationWorkload>(*layer, factory);
91 
92  ActivationQueueDescriptor queueDescriptor = workload->GetData();
93  CHECK(queueDescriptor.m_Inputs.size() == 1);
94  CHECK(queueDescriptor.m_Outputs.size() == 1);
95  CHECK(queueDescriptor.m_Parameters.m_A == 3.5f);
96  CHECK(queueDescriptor.m_Parameters.m_B == -10.0f);
97  CHECK((queueDescriptor.m_Parameters.m_Function == ActivationFunction::ReLu));
98 
99  // Returns so we can do extra, backend-specific tests.
100  return workload;
101 }
102 
103 template <typename WorkloadType,
104  typename DescriptorType,
105  typename LayerType,
107 std::unique_ptr<WorkloadType> CreateElementwiseWorkloadTest(armnn::IWorkloadFactory & factory,
108  armnn::Graph & graph)
109 {
110  // Creates the layer we're testing.
111  Layer* const layer = graph.AddLayer<LayerType>("layer");
112 
113  // Creates extra layers.
114  Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
115  Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
116  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
117 
118  // Connects up.
119  armnn::TensorInfo tensorInfo({2, 3}, DataType);
120  Connect(input1, layer, tensorInfo, 0, 0);
121  Connect(input2, layer, tensorInfo, 0, 1);
122  Connect(layer, output, tensorInfo);
123  CreateTensorHandles(graph, factory);
124 
125  // Makes the workload and checks it.
126  auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
127 
128  DescriptorType queueDescriptor = workload->GetData();
129  CHECK(queueDescriptor.m_Inputs.size() == 2);
130  CHECK(queueDescriptor.m_Outputs.size() == 1);
131 
132  // Returns so we can do extra, backend-specific tests.
133  return workload;
134 }
135 
136 template<typename WorkloadType,
137  typename DescriptorType,
139 std::unique_ptr<WorkloadType> CreateSubtractionWithBlobWorkloadTest(armnn::IWorkloadFactory& factory,
140  armnn::Graph& graph)
141 {
142  // Creates the layer we're testing.
143  SubtractionLayer* const layer = graph.AddLayer<SubtractionLayer>("layer");
144 
145  auto activationDesc = std::make_shared<ActivationDescriptor>();
146  activationDesc->m_A = 10.0f;
147  activationDesc->m_B = 5.0f;
148  activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
149 
150  layer->SetAdditionalInfoForObject(activationDesc);
151 
152  // Creates extra layers.
153  Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
154  Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
155  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
156 
157  // Connects up.
158  armnn::TensorInfo tensorInfo({2, 3}, DataType);
159  Connect(input1, layer, tensorInfo, 0, 0);
160  Connect(input2, layer, tensorInfo, 0, 1);
161  Connect(layer, output, tensorInfo);
162  CreateTensorHandles(graph, factory);
163 
164  // Check that the additional information can be queried from the layer
165  std::shared_ptr<ActivationDescriptor>
166  activationDescPtr = layer->GetAdditionalInformation<ActivationDescriptor>();
167 
168  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
169  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
170  ARMNN_ASSERT(
171  static_cast<ActivationFunction>(activationDescPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
172  );
173 
174  // Makes the workload and checks it.
175  auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
176 
177  DescriptorType queueDescriptor = workload->GetData();
178 
179  const ActivationDescriptor* queueDescBlobPtr =
180  queueDescriptor.template GetAdditionalInformation<ActivationDescriptor>();
181  IgnoreUnused(queueDescBlobPtr);
182  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
183  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
184  ARMNN_ASSERT(
185  static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
186  );
187 
188  CHECK(queueDescriptor.m_Inputs.size() == 2);
189  CHECK(queueDescriptor.m_Outputs.size() == 1);
190 
191  return workload;
192 }
193 
194 template<typename WorkloadType,
195  typename DescriptorType,
197 std::unique_ptr<WorkloadType> CreateMultiplicationWithBlobWorkloadTest(armnn::IWorkloadFactory& factory,
198  armnn::Graph& graph)
199 {
200  // Creates the layer we're testing.
201  MultiplicationLayer* const layer = graph.AddLayer<MultiplicationLayer>("layer");
202 
203  auto activationDesc = std::make_shared<ActivationDescriptor>();
204  activationDesc->m_A = 10.0f;
205  activationDesc->m_B = 5.0f;
206  activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
207 
208  layer->SetAdditionalInfoForObject(activationDesc);
209 
210  // Creates extra layers.
211  Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
212  Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
213  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
214 
215  // Connects up.
216  armnn::TensorInfo tensorInfo({2, 3}, DataType);
217  Connect(input1, layer, tensorInfo, 0, 0);
218  Connect(input2, layer, tensorInfo, 0, 1);
219  Connect(layer, output, tensorInfo);
220  CreateTensorHandles(graph, factory);
221 
222  // Check that the additional information can be queried from the layer
223  std::shared_ptr<ActivationDescriptor>
224  activationDescPtr = layer->GetAdditionalInformation<ActivationDescriptor>();
225 
226  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
227  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
228  ARMNN_ASSERT(
229  static_cast<ActivationFunction>(activationDescPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
230  );
231 
232  // Makes the workload and checks it.
233  auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
234 
235  DescriptorType queueDescriptor = workload->GetData();
236  CHECK(queueDescriptor.m_Inputs.size() == 2);
237  CHECK(queueDescriptor.m_Outputs.size() == 1);
238  const ActivationDescriptor* queueDescBlobPtr =
239  queueDescriptor.template GetAdditionalInformation<ActivationDescriptor>();
240  IgnoreUnused(queueDescBlobPtr);
241  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
242  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
243  ARMNN_ASSERT(
244  static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
245  );
246 
247  return workload;// Returns so we can do extra, backend-specific tests.
248 }
249 
250 template<typename WorkloadType,
251  typename DescriptorType,
253 std::unique_ptr<WorkloadType> CreateAdditionWithBlobWorkloadTest(armnn::IWorkloadFactory& factory,
254  armnn::Graph& graph)
255 {
256  // Creates the layer we're testing.
257  AdditionLayer* const layer = graph.AddLayer<AdditionLayer>("layer");
258 
259  auto activationDesc = std::make_shared<ActivationDescriptor>();
260  activationDesc->m_A = 10.0f;
261  activationDesc->m_B = 5.0f;
262  activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
263 
264  layer->SetAdditionalInfoForObject(activationDesc);
265 
266  // Creates extra layers.
267  Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
268  Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
269  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
270 
271  // Connects up.
272  armnn::TensorInfo tensorInfo({2, 3}, DataType);
273  Connect(input1, layer, tensorInfo, 0, 0);
274  Connect(input2, layer, tensorInfo, 0, 1);
275  Connect(layer, output, tensorInfo);
276  CreateTensorHandles(graph, factory);
277 
278  // Check that the additional information can be queried from the layer
279  std::shared_ptr<ActivationDescriptor>
280  activationDescPtr = layer->template GetAdditionalInformation<ActivationDescriptor>();
281 
282  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
283  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
284  ARMNN_ASSERT(
285  static_cast<ActivationFunction>(activationDescPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
286  );
287 
288  // Makes the workload and checks it.
289  auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
290 
291  DescriptorType queueDescriptor = workload->GetData();
292  const ActivationDescriptor* queueDescBlobPtr =
293  queueDescriptor.template GetAdditionalInformation<ActivationDescriptor>();
294  IgnoreUnused(queueDescBlobPtr);
295  CHECK(queueDescriptor.m_Inputs.size() == 2);
296  CHECK(queueDescriptor.m_Outputs.size() == 1);
297  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
298  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
299  ARMNN_ASSERT(
300  static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
301  );
302 
303  return workload;
304 }
305 
306 template <typename WorkloadType,
307  typename DescriptorType,
309 std::unique_ptr<WorkloadType> CreateElementwiseUnaryWorkloadTest(armnn::IWorkloadFactory & factory,
310  armnn::Graph & graph,
312 {
314  Layer* const layer = graph.AddLayer<armnn::ElementwiseUnaryLayer>(desc, "layer");
315 
316  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
317  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
318 
319  armnn::TensorInfo tensorInfo({ 2, 3 }, DataType);
320  Connect(input, layer, tensorInfo, 0, 0);
321  Connect(layer, output, tensorInfo, 0, 0);
322  CreateTensorHandles(graph, factory);
323 
324  auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
325  DescriptorType queueDescriptor = workload->GetData();
326 
327  CHECK(queueDescriptor.m_Inputs.size() == 1);
328  CHECK(queueDescriptor.m_Outputs.size() == 1);
329 
330  return workload;
331 }
332 
333 template <typename BatchNormalizationWorkloadType, armnn::DataType DataType>
334 std::unique_ptr<BatchNormalizationWorkloadType> CreateBatchNormalizationWorkloadTest(
335  armnn::IWorkloadFactory& factory, armnn::Graph& graph, DataLayout dataLayout = DataLayout::NCHW)
336 {
337  TensorShape tensorShape;
338  switch (dataLayout)
339  {
340  case DataLayout::NHWC:
341  tensorShape = { 2, 4, 4, 3 };
342  break;
343  case DataLayout::NCHW:
344  default:
345  tensorShape = { 2, 3, 4, 4 };
346  }
347 
348  // Creates the layer we're testing.
350  layerDesc.m_Eps = 0.05f;
351  layerDesc.m_DataLayout = dataLayout;
352 
353  BatchNormalizationLayer* const layer = graph.AddLayer<BatchNormalizationLayer>(layerDesc, "layer");
354 
355  armnn::TensorInfo weightInfo({3}, DataType);
356  layer->m_Mean = std::make_unique<ScopedTensorHandle>(weightInfo);
357  layer->m_Variance = std::make_unique<ScopedTensorHandle>(weightInfo);
358  layer->m_Beta = std::make_unique<ScopedTensorHandle>(weightInfo);
359  layer->m_Gamma = std::make_unique<ScopedTensorHandle>(weightInfo);
360  layer->m_Mean->Allocate();
361  layer->m_Variance->Allocate();
362  layer->m_Beta->Allocate();
363  layer->m_Gamma->Allocate();
364 
365  // Creates extra layers.
366  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
367  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
368 
369  // Connects up.
370  armnn::TensorInfo tensorInfo(tensorShape, DataType);
371  Connect(input, layer, tensorInfo);
372  Connect(layer, output, tensorInfo);
373  CreateTensorHandles(graph, factory);
374 
375  // Makes the workload and checks it.
376  auto workload = MakeAndCheckWorkload<BatchNormalizationWorkloadType>(*layer, factory);
377  BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
378  CHECK(queueDescriptor.m_Parameters.m_Eps == 0.05f);
379  CHECK(queueDescriptor.m_Inputs.size() == 1);
380  CHECK(queueDescriptor.m_Outputs.size() == 1);
381  CHECK((queueDescriptor.m_Mean->GetTensorInfo() == TensorInfo({3}, DataType)));
382  CHECK((queueDescriptor.m_Variance->GetTensorInfo() == TensorInfo({3}, DataType)));
383  CHECK((queueDescriptor.m_Gamma->GetTensorInfo() == TensorInfo({3}, DataType)));
384  CHECK((queueDescriptor.m_Beta->GetTensorInfo() == TensorInfo({3}, DataType)));
385  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
386 
387  // Returns so we can do extra, backend-specific tests.
388  return workload;
389 }
390 
391 template <typename BatchNormalizationWorkloadType, armnn::DataType DataType>
392 std::unique_ptr<BatchNormalizationWorkloadType> CreateBatchNormalizationWithBlobWorkloadTest(
393  armnn::IWorkloadFactory& factory, armnn::Graph& graph, DataLayout dataLayout = DataLayout::NCHW)
394 {
395  TensorShape tensorShape;
396  switch (dataLayout)
397  {
398  case DataLayout::NHWC:
399  tensorShape = { 2, 4, 4, 3 };
400  break;
401  case DataLayout::NCHW:
402  default:
403  tensorShape = { 2, 3, 4, 4 };
404  }
405 
406  // Creates the layer we're testing.
408  layerDesc.m_Eps = 0.05f;
409  layerDesc.m_DataLayout = dataLayout;
410 
411  BatchNormalizationLayer* const layer = graph.AddLayer<BatchNormalizationLayer>(layerDesc, "layer");
412 
413  armnn::TensorInfo weightInfo({3}, DataType);
414  layer->m_Mean = std::make_unique<ScopedTensorHandle>(weightInfo);
415  layer->m_Variance = std::make_unique<ScopedTensorHandle>(weightInfo);
416  layer->m_Beta = std::make_unique<ScopedTensorHandle>(weightInfo);
417  layer->m_Gamma = std::make_unique<ScopedTensorHandle>(weightInfo);
418  layer->m_Mean->Allocate();
419  layer->m_Variance->Allocate();
420  layer->m_Beta->Allocate();
421  layer->m_Gamma->Allocate();
422 
423  auto activationDesc = std::make_shared<ActivationDescriptor>();
424  activationDesc->m_A = 10.0f;
425  activationDesc->m_B = 5.0f;
426  activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
427 
428  layer->SetAdditionalInfoForObject(activationDesc);
429 
430  // Check that the additional information can be queried from the layer
431  std::shared_ptr<ActivationDescriptor> activationDescPtr = layer->GetAdditionalInformation<ActivationDescriptor>();
432  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
433  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
434  ARMNN_ASSERT(
435  static_cast<ActivationFunction>(activationDescPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
436  );
437 
438  // Creates extra layers.
439  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
440  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
441 
442  // Connects up.
443  armnn::TensorInfo tensorInfo(tensorShape, DataType);
444  Connect(input, layer, tensorInfo);
445  Connect(layer, output, tensorInfo);
446  CreateTensorHandles(graph, factory);
447 
448  // Makes the workload and checks it.
449  auto workload = MakeAndCheckWorkload<BatchNormalizationWorkloadType>(*layer, factory);
450  BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
451  const ActivationDescriptor* queueDescBlobPtr = queueDescriptor.GetAdditionalInformation<ActivationDescriptor>();
452  IgnoreUnused(queueDescBlobPtr);
453  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
454  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
455  ARMNN_ASSERT(
456  static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
457  );
458 
459  CHECK(queueDescriptor.m_Parameters.m_Eps == 0.05f);
460  CHECK(queueDescriptor.m_Inputs.size() == 1);
461  CHECK(queueDescriptor.m_Outputs.size() == 1);
462  CHECK((queueDescriptor.m_Mean->GetTensorInfo() == TensorInfo({3}, DataType)));
463  CHECK((queueDescriptor.m_Variance->GetTensorInfo() == TensorInfo({3}, DataType)));
464  CHECK((queueDescriptor.m_Gamma->GetTensorInfo() == TensorInfo({3}, DataType)));
465  CHECK((queueDescriptor.m_Beta->GetTensorInfo() == TensorInfo({3}, DataType)));
466  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
467 
468  // Returns so we can do extra, backend-specific tests.
469  return workload;
470 }
471 
472 template <typename Convolution2dWorkload, armnn::DataType DataType>
473 std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory,
474  armnn::Graph& graph,
475  DataLayout dataLayout = DataLayout::NCHW,
476  const ModelOptions& modelOptions = {})
477 {
478  // Creates the layer we're testing.
479  Convolution2dDescriptor layerDesc;
480  layerDesc.m_PadLeft = 3;
481  layerDesc.m_PadRight = 3;
482  layerDesc.m_PadTop = 1;
483  layerDesc.m_PadBottom = 1;
484  layerDesc.m_StrideX = 2;
485  layerDesc.m_StrideY = 4;
486  layerDesc.m_BiasEnabled = false;
487  layerDesc.m_DataLayout = dataLayout;
488 
489  float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0;
490  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0;
491 
492  Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
493 
494  TensorShape weightShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 5, 3} : TensorShape{2, 5, 3, 3};
495  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3};
496  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2};
497 
498  // As optimization isn't run member variables need to be updated.
499  layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo(weightShape, DataType));
500  layer->m_Weight->Allocate();
501 
502  armnn::TensorInfo weightsTensorInfo(weightShape, DataType, inputsQScale);
503  weightsTensorInfo.SetConstant();
504 
505  // Creates extra layers.
506  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
507  auto const weights = graph.AddLayer<ConstantLayer>("weights");
508  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
509 
510  weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
511  weights->m_LayerOutput->Allocate();
512 
513  // Connects up.
514  Connect(input, layer, TensorInfo(inputShape, DataType, inputsQScale));
515  Connect(weights, layer, weightsTensorInfo, 0, 1);
516  Connect(layer, output, TensorInfo(outputShape, DataType, outputQScale));
517  CreateTensorHandles(graph, factory);
518 
519  // Makes the workload and checks it.
520  auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
521 
522  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
523  CHECK(queueDescriptor.m_Parameters.m_StrideX == 2);
524  CHECK(queueDescriptor.m_Parameters.m_StrideY == 4);
525  CHECK(queueDescriptor.m_Parameters.m_PadLeft == 3);
526  CHECK(queueDescriptor.m_Parameters.m_PadRight == 3);
527  CHECK(queueDescriptor.m_Parameters.m_PadTop == 1);
528  CHECK(queueDescriptor.m_Parameters.m_PadBottom == 1);
529  CHECK(!queueDescriptor.m_Parameters.m_BiasEnabled);
530  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
531 
532  CHECK(queueDescriptor.m_Inputs.size() == 2);
533  CHECK(queueDescriptor.m_Outputs.size() == 1);
534 
535  // Returns so we can do extra, backend-specific tests.
536  return workload;
537 }
538 
539 template<typename Convolution2dWorkload, armnn::DataType DataType>
540 std::unique_ptr<Convolution2dWorkload> CreateConvolution2dFusedActivationWithBlobWorkloadTest(
541  armnn::IWorkloadFactory& factory,
542  armnn::Graph& graph,
543  DataLayout dataLayout = DataLayout::NCHW,
544  const ModelOptions& modelOptions = {})
545 {
546  // Creates the layer we're testing.
547  Convolution2dDescriptor layerDesc;
548  layerDesc.m_PadLeft = 3;
549  layerDesc.m_PadRight = 3;
550  layerDesc.m_PadTop = 1;
551  layerDesc.m_PadBottom = 1;
552  layerDesc.m_StrideX = 2;
553  layerDesc.m_StrideY = 4;
554  layerDesc.m_BiasEnabled = true;
555  layerDesc.m_DataLayout = dataLayout;
556 
557  float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0;
558  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0;
559 
560  Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
561 
562  TensorShape weightShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 5, 3} : TensorShape{2, 5, 3, 3};
563  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3};
564  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2};
565  // As optimization isn't run member variables need to be updated.
566  layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo(weightShape, DataType));
567  layer->m_Bias = std::make_unique<ScopedTensorHandle>(TensorInfo({2}, GetBiasDataType(DataType)));
568 
569  layer->m_Weight->Allocate();
570  layer->m_Bias->Allocate();
571 
572  armnn::TensorInfo weightsTensorInfo(weightShape, DataType, inputsQScale);
573  weightsTensorInfo.SetConstant();
574  armnn::TensorInfo biasTensorInfo({2}, DataType, inputsQScale);
575  biasTensorInfo.SetConstant();
576 
577  auto activationDesc = std::make_shared<ActivationDescriptor>();
578  activationDesc->m_A = 10.0f;
579  activationDesc->m_B = 5.0f;
580  activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
581 
582  layer->SetAdditionalInfoForObject(activationDesc);
583 
584  // Check that the additional information can be queried from the layer
585  std::shared_ptr<ActivationDescriptor> activationDescPtr = layer->GetAdditionalInformation<ActivationDescriptor>();
586 
587  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
588  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
589  ARMNN_ASSERT(
590  static_cast<ActivationFunction>(activationDescPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
591  );
592 
593  // Creates extra layers.
594  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
595  auto const weights = graph.AddLayer<ConstantLayer>("weights");
596  auto const bias = graph.AddLayer<ConstantLayer>("bias");
597  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
598 
599  weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
600  weights->m_LayerOutput->Allocate();
601  bias->m_LayerOutput = std::make_unique<ScopedTensorHandle>(biasTensorInfo);
602  bias->m_LayerOutput->Allocate();
603 
604  // Connects up.
605  Connect(input, layer, TensorInfo(inputShape, DataType, inputsQScale));
606  Connect(weights, layer, weightsTensorInfo, 0, 1);
607  Connect(bias, layer, biasTensorInfo, 0, 2);
608  Connect(layer, output, TensorInfo(outputShape, DataType, outputQScale));
609  CreateTensorHandles(graph, factory);
610 
611  // Makes the workload and checks it.
612  auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
613 
614  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
615  const ActivationDescriptor* queueDescBlobPtr = queueDescriptor.GetAdditionalInformation<ActivationDescriptor>();
616  IgnoreUnused(queueDescBlobPtr);
617  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
618  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
619  ARMNN_ASSERT(
620  static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
621  );
622 
623  CHECK(queueDescriptor.m_Parameters.m_StrideX == 2);
624  CHECK(queueDescriptor.m_Parameters.m_StrideY == 4);
625  CHECK(queueDescriptor.m_Parameters.m_PadLeft == 3);
626  CHECK(queueDescriptor.m_Parameters.m_PadRight == 3);
627  CHECK(queueDescriptor.m_Parameters.m_PadTop == 1);
628  CHECK(queueDescriptor.m_Parameters.m_PadBottom == 1);
629  CHECK(queueDescriptor.m_Parameters.m_BiasEnabled);
630  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
631 
632  CHECK(queueDescriptor.m_Outputs.size() == 1);
633  CHECK(queueDescriptor.m_Inputs.size() == 3);
634 
635  // Returns so we can do extra, backend-specific tests.
636  return workload;
637 }
638 
639 template <typename Convolution2dWorkload, armnn::DataType DataType>
640 std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadFastMathTest(armnn::IWorkloadFactory& factory,
641  armnn::Graph& graph,
642  DataLayout dataLayout = DataLayout::NCHW,
643  const ModelOptions& modelOptions = {})
644 {
645  // Creates the layer we're testing.
646  Convolution2dDescriptor layerDesc;
647  layerDesc.m_PadLeft = 0;
648  layerDesc.m_PadRight = 0;
649  layerDesc.m_PadTop = 0;
650  layerDesc.m_PadBottom = 0;
651  layerDesc.m_StrideX = 1;
652  layerDesc.m_StrideY = 1;
653  layerDesc.m_BiasEnabled = true;
654  layerDesc.m_DataLayout = dataLayout;
655 
656  float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0;
657  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0;
658 
659  Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
660 
661  TensorShape weightShape = TensorShape{ 32, 32, 3, 3 };
662  TensorShape biasShape = TensorShape{ 32 };
663  TensorShape inputShape = TensorShape{ 1, 32, 149, 149 };
664  TensorShape outputShape = TensorShape{ 1, 32, 147, 147 };
665  // As optimization isn't run member variables need to be updated.
666  layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo(weightShape, DataType));
667  layer->m_Bias = std::make_unique<ScopedTensorHandle>(TensorInfo(biasShape, GetBiasDataType(DataType)));
668 
669  layer->m_Weight->Allocate();
670  layer->m_Bias->Allocate();
671 
672  armnn::TensorInfo weightsTensorInfo(weightShape, DataType, inputsQScale);
673  weightsTensorInfo.SetConstant();
674  armnn::TensorInfo biasTensorInfo(biasShape, DataType, inputsQScale);
675  biasTensorInfo.SetConstant();
676 
677  // Creates extra layers.
678  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
679  auto const weights = graph.AddLayer<ConstantLayer>("weights");
680  auto const bias = graph.AddLayer<ConstantLayer>("bias");
681  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
682 
683  // Connects up.
684  Connect(input, layer, TensorInfo(inputShape, DataType));
685  Connect(weights, layer, weightsTensorInfo, 0, 1);
686  Connect(bias, layer, biasTensorInfo, 0, 2);
687  Connect(layer, output, TensorInfo(outputShape, DataType, outputQScale));
688  CreateTensorHandles(graph, factory);
689 
690  // Makes the workload and checks it.
691  auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
692 
693  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
694  CHECK(queueDescriptor.m_Parameters.m_StrideX == 1);
695  CHECK(queueDescriptor.m_Parameters.m_StrideY == 1);
696  CHECK(queueDescriptor.m_Parameters.m_PadLeft == 0);
697  CHECK(queueDescriptor.m_Parameters.m_PadRight == 0);
698  CHECK(queueDescriptor.m_Parameters.m_PadTop == 0);
699  CHECK(queueDescriptor.m_Parameters.m_PadBottom == 0);
700  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
701 
702  CHECK(queueDescriptor.m_Inputs.size() == 3);
703  CHECK(queueDescriptor.m_Outputs.size() == 1);
704 
705  // Returns so we can do extra, backend-specific tests.
706  return workload;
707 }
708 
709 template <typename LstmWorkload>
710 std::unique_ptr<LstmWorkload> CreateLstmWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph)
711 {
712  // This parameter setting is for withCifgWithPeepholeNoProjection
713  LstmDescriptor layerDesc;
714  layerDesc.m_ActivationFunc = 4;
715  layerDesc.m_ClippingThresCell = 0.0f;
716  layerDesc.m_ClippingThresProj = 0.0f;
717  layerDesc.m_CifgEnabled = true;
718  layerDesc.m_PeepholeEnabled = true;
719  layerDesc.m_ProjectionEnabled = false;
720 
721  LstmLayer* const layer = graph.AddLayer<LstmLayer>(layerDesc, "layer");
722  unsigned int batchSize = 2;
723  unsigned int inputSize = 2;
724  unsigned int numUnits = 4;
725  unsigned int outputSize = 4;
726 
727  layer->m_BasicParameters.m_InputToForgetWeights = std::make_unique<ScopedTensorHandle>
728  (TensorInfo({ numUnits, inputSize }, DataType::Float32));
729  layer->m_BasicParameters.m_InputToCellWeights = std::make_unique<ScopedTensorHandle>
730  (TensorInfo({ numUnits, inputSize }, DataType::Float32));
731  layer->m_BasicParameters.m_InputToOutputWeights = std::make_unique<ScopedTensorHandle>
732  (TensorInfo({ numUnits, inputSize }, DataType::Float32));
733  layer->m_BasicParameters.m_RecurrentToForgetWeights = std::make_unique<ScopedTensorHandle>
734  (TensorInfo({ numUnits, outputSize }, DataType::Float32));
735  layer->m_BasicParameters.m_RecurrentToCellWeights = std::make_unique<ScopedTensorHandle>
736  (TensorInfo({ numUnits, outputSize }, DataType::Float32));
737  layer->m_BasicParameters.m_RecurrentToOutputWeights = std::make_unique<ScopedTensorHandle>
738  (TensorInfo({ numUnits, outputSize }, DataType::Float32));
739  layer->m_BasicParameters.m_ForgetGateBias = std::make_unique<ScopedTensorHandle>
740  (TensorInfo({ numUnits }, DataType::Float32));
741  layer->m_BasicParameters.m_CellBias = std::make_unique<ScopedTensorHandle>
742  (TensorInfo({ numUnits }, DataType::Float32));
743  layer->m_BasicParameters.m_OutputGateBias = std::make_unique<ScopedTensorHandle>
744  (TensorInfo({ numUnits }, DataType::Float32));
745 
746  layer->m_BasicParameters.m_InputToForgetWeights->Allocate();
747  layer->m_BasicParameters.m_InputToCellWeights->Allocate();
748  layer->m_BasicParameters.m_InputToOutputWeights->Allocate();
750  layer->m_BasicParameters.m_RecurrentToCellWeights->Allocate();
752  layer->m_BasicParameters.m_ForgetGateBias->Allocate();
753  layer->m_BasicParameters.m_CellBias->Allocate();
754  layer->m_BasicParameters.m_OutputGateBias->Allocate();
755 
756 
757  if (layerDesc.m_PeepholeEnabled)
758  {
759  layer->m_PeepholeParameters.m_CellToForgetWeights = std::make_unique<ScopedTensorHandle>
760  (TensorInfo({ numUnits }, DataType::Float32));
761  layer->m_PeepholeParameters.m_CellToOutputWeights = std::make_unique<ScopedTensorHandle>
762  (TensorInfo({ numUnits }, DataType::Float32));
763  layer->m_PeepholeParameters.m_CellToForgetWeights->Allocate();
764  layer->m_PeepholeParameters.m_CellToOutputWeights->Allocate();
765  }
766 
767  // create input and output layers
768  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
769  Layer* const outputStateIn = graph.AddLayer<InputLayer>(1, "outputStateIn");
770  Layer* const cellStateIn = graph.AddLayer<InputLayer>(2, "cellStateIn");
771  Layer* const scratchBuffer = graph.AddLayer<OutputLayer>(0, "scratchBuffer");
772  Layer* const outputStateOut = graph.AddLayer<OutputLayer>(1, "outputStateOut");
773  Layer* const cellStateOut = graph.AddLayer<OutputLayer>(2, "cellStateOut");
774  Layer* const output = graph.AddLayer<OutputLayer>(3, "output");
775 
776  // connect up
777  armnn::TensorInfo lstmTensorInfo1({ batchSize, inputSize }, DataType::Float32);
778  armnn::TensorInfo lstmTensorInfo2({ batchSize, numUnits}, DataType::Float32);
779  armnn::TensorInfo lstmTensorInfo3({ batchSize, outputSize }, DataType::Float32);
780  armnn::TensorInfo lstmTensorInfoScratchBuff({ batchSize, numUnits * (layerDesc.m_CifgEnabled ? 3 : 4) },
782  Connect(input, layer, lstmTensorInfo1, 0, 0);
783  Connect(cellStateIn, layer, lstmTensorInfo2, 0, 1);
784  Connect(outputStateIn, layer, lstmTensorInfo3, 0, 2);
785  Connect(layer, scratchBuffer, lstmTensorInfoScratchBuff, 0, 0);
786  Connect(layer, outputStateOut, lstmTensorInfo3, 1, 0);
787  Connect(layer, cellStateOut, lstmTensorInfo2, 2, 0);
788  Connect(layer, output, lstmTensorInfo3, 3, 0);
789 
790  CreateTensorHandles(graph, factory);
791 
792  // make the workload and check it
793  auto workload = MakeAndCheckWorkload<LstmWorkload>(*layer, factory);
794  LstmQueueDescriptor queueDescriptor = workload->GetData();
795  CHECK(queueDescriptor.m_Parameters.m_ActivationFunc == 4);
796  CHECK(queueDescriptor.m_Parameters.m_ClippingThresCell == 0.0f);
797  CHECK(queueDescriptor.m_Parameters.m_ClippingThresProj == 0.0f);
798  CHECK(queueDescriptor.m_Inputs.size() == 3);
799  CHECK(queueDescriptor.m_Outputs.size() == 4);
800 
801  CHECK((queueDescriptor.m_InputToForgetWeights->GetTensorInfo() == TensorInfo({ numUnits, inputSize },
803  CHECK((queueDescriptor.m_OutputGateBias->GetTensorInfo() == TensorInfo({ numUnits },
805  CHECK((queueDescriptor.m_CellBias->GetTensorInfo() == TensorInfo({ numUnits }, DataType::Float32)));
806  return workload;
807 }
808 
809 template <typename QuantizedLstmWorkload>
810 std::unique_ptr<QuantizedLstmWorkload> CreateQuantizedLstmWorkloadTest(armnn::IWorkloadFactory& factory,
811  armnn::Graph& graph)
812 {
813  auto layer = graph.AddLayer<QuantizedLstmLayer>("quantizedLstmlayer");
814  unsigned int numBatches = 2;
815  unsigned int inputSize = 2;
816  unsigned int outputSize = 4;
817 
818  // Scale/Offset for input/output, cellState In/Out, weights, bias
819  float inputOutputScale = 0.0078125f;
820  int32_t inputOutputOffset = 128;
821 
822  float cellStateScale = 0.00048828125f;
823  int32_t cellStateOffset = 0;
824 
825  float weightsScale = 0.00408021f;
826  int32_t weightsOffset = 100;
827 
828  float biasScale = 3.1876640625e-05f;
829  int32_t biasOffset = 0;
830 
831  // Weights and bias tensor and quantization info
832  armnn::TensorInfo inputWeightsInfo({outputSize, inputSize},
834  weightsScale,
835  weightsOffset);
836 
837  armnn::TensorInfo recurrentWeightsInfo({outputSize, outputSize},
839  weightsScale,
840  weightsOffset);
841 
842  armnn::TensorInfo biasInfo({outputSize},
844  biasScale,
845  biasOffset);
846 
847  // Weights and bias
848  layer->m_QuantizedLstmParameters.m_InputToInputWeights =
849  std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
850  layer->m_QuantizedLstmParameters.m_InputToForgetWeights =
851  std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
852  layer->m_QuantizedLstmParameters.m_InputToCellWeights =
853  std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
854  layer->m_QuantizedLstmParameters.m_InputToOutputWeights =
855  std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
856 
857  layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights =
858  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
859  layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights =
860  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
861  layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights =
862  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
863  layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights =
864  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
865 
866  layer->m_QuantizedLstmParameters.m_InputGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
867  layer->m_QuantizedLstmParameters.m_ForgetGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
868  layer->m_QuantizedLstmParameters.m_CellBias = std::make_unique<ScopedTensorHandle>(biasInfo);
869  layer->m_QuantizedLstmParameters.m_OutputGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
870 
871  // Allocate weights and bias
872  layer->m_QuantizedLstmParameters.m_InputToInputWeights->Allocate();
873  layer->m_QuantizedLstmParameters.m_InputToForgetWeights->Allocate();
874  layer->m_QuantizedLstmParameters.m_InputToCellWeights->Allocate();
875  layer->m_QuantizedLstmParameters.m_InputToOutputWeights->Allocate();
876 
877  layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights->Allocate();
878  layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights->Allocate();
879  layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights->Allocate();
880  layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights->Allocate();
881 
882  layer->m_QuantizedLstmParameters.m_InputGateBias->Allocate();
883  layer->m_QuantizedLstmParameters.m_ForgetGateBias->Allocate();
884  layer->m_QuantizedLstmParameters.m_CellBias->Allocate();
885  layer->m_QuantizedLstmParameters.m_OutputGateBias->Allocate();
886 
887  // Create input and output layers
888  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
889  Layer* const cellStateIn = graph.AddLayer<InputLayer>(1, "cellStateIn");
890  Layer* const outputStateIn = graph.AddLayer<InputLayer>(2, "outputStateIn");
891 
892  Layer* const cellStateOut = graph.AddLayer<OutputLayer>(0, "cellStateOut");
893  Layer* const outputStateOut = graph.AddLayer<OutputLayer>(1, "outputStateOut");
894 
895  // Input/output tensor info and quantization info
896  armnn::TensorInfo inputInfo({numBatches , inputSize},
898  inputOutputScale,
899  inputOutputOffset);
900 
901  armnn::TensorInfo cellStateInfo({numBatches , outputSize},
903  cellStateScale,
904  cellStateOffset);
905 
906  armnn::TensorInfo outputStateInfo({numBatches , outputSize},
908  inputOutputScale,
909  inputOutputOffset);
910 
911  // Connect input/output slots
912  Connect(input, layer, inputInfo, 0, 0);
913  Connect(cellStateIn, layer, cellStateInfo, 0, 1);
914  Connect(outputStateIn, layer, outputStateInfo, 0, 2);
915 
916  Connect(layer, cellStateOut, cellStateInfo, 0, 0);
917  Connect(layer, outputStateOut, outputStateInfo, 1, 0);
918 
919  CreateTensorHandles(graph, factory);
920 
921  // Create workload and check layer support
922  auto workload = MakeAndCheckWorkload<QuantizedLstmWorkload>(*layer, factory);
923  QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
924 
925  // Validate input/output sizes
926  CHECK(queueDescriptor.m_Inputs.size() == 3);
927  CHECK(queueDescriptor.m_Outputs.size() == 2);
928 
929  // Validate weight tensor info
930  CHECK((queueDescriptor.m_InputToInputWeights->GetTensorInfo() == inputWeightsInfo));
931  CHECK((queueDescriptor.m_InputToForgetWeights->GetTensorInfo() == inputWeightsInfo));
932  CHECK((queueDescriptor.m_InputToCellWeights->GetTensorInfo() == inputWeightsInfo));
933  CHECK((queueDescriptor.m_InputToOutputWeights->GetTensorInfo() == inputWeightsInfo));
934 
935  CHECK((queueDescriptor.m_RecurrentToInputWeights->GetTensorInfo() == recurrentWeightsInfo));
936  CHECK((queueDescriptor.m_RecurrentToForgetWeights->GetTensorInfo() == recurrentWeightsInfo));
937  CHECK((queueDescriptor.m_RecurrentToCellWeights->GetTensorInfo() == recurrentWeightsInfo));
938  CHECK((queueDescriptor.m_RecurrentToOutputWeights->GetTensorInfo() == recurrentWeightsInfo));
939 
940  CHECK((queueDescriptor.m_InputGateBias->GetTensorInfo() == biasInfo));
941  CHECK((queueDescriptor.m_ForgetGateBias->GetTensorInfo() == biasInfo));
942  CHECK((queueDescriptor.m_CellBias->GetTensorInfo() == biasInfo));
943  CHECK((queueDescriptor.m_OutputGateBias->GetTensorInfo() == biasInfo));
944 
945  return workload;
946 }
947 
948 template <typename QLstmWorkload>
949 std::unique_ptr<QLstmWorkload> CreateQLstmWorkloadTest(armnn::IWorkloadFactory& factory,
950  armnn::Graph& graph)
951 {
952  QLstmDescriptor layerDesc;
953  layerDesc.m_CifgEnabled = true;
954  layerDesc.m_PeepholeEnabled = false;
955  layerDesc.m_ProjectionEnabled = false;
956  layerDesc.m_LayerNormEnabled = true;
957 
958  layerDesc.m_CellClip = 0.0f;
959  layerDesc.m_ProjectionClip = 0.0f;
960 
961  layerDesc.m_HiddenStateZeroPoint = 0;
962  layerDesc.m_HiddenStateScale = 0.007f;
963 
964  layerDesc.m_InputIntermediateScale = 0.007059f;
965  layerDesc.m_ForgetIntermediateScale = 0.007812f;
966  layerDesc.m_CellIntermediateScale = 0.007059f;
967  layerDesc.m_OutputIntermediateScale = 0.007812f;
968 
969  QLstmLayer* const layer = graph.AddLayer<QLstmLayer>(layerDesc, "qLstm");
970 
971  unsigned int numBatches = 2;
972  unsigned int inputSize = 4;
973  unsigned int numUnits = 4;
974  unsigned int outputSize = 4;
975 
976  // Scale/Offset quantization info
977  float inputScale = 0.0078125f;
978  int32_t inputOffset = 0;
979 
980  // if (!projectionEnabled) outputScale == hiddenStateScale
981  float outputScale = layerDesc.m_HiddenStateScale;
982  int32_t outputOffset = layerDesc.m_HiddenStateZeroPoint;
983 
984  float cellStateScale = 3.05176e-05f;
985  int32_t cellStateOffset = 0;
986 
987  float weightsScale = 0.00784314f;
988  int32_t weightsOffset = 0;
989 
990  float layerNormScale = 3.05182e-05f;
991  int32_t layerNormOffset = 0;
992 
993  float biasScale = layerNormScale / 1024;
994  int32_t biasOffset = 0;
995 
996  // Weights and bias tensor and quantization info
997  armnn::TensorInfo inputWeightsInfo({outputSize, inputSize},
999  weightsScale,
1000  weightsOffset);
1001 
1002  armnn::TensorInfo recurrentWeightsInfo({outputSize, outputSize},
1004  weightsScale,
1005  weightsOffset);
1006 
1007  armnn::TensorInfo biasInfo({outputSize}, armnn::DataType::Signed32, biasScale, biasOffset);
1008 
1009  armnn::TensorInfo layerNormWeightsInfo({numUnits}, armnn::DataType::QSymmS16, layerNormScale, layerNormOffset);
1010 
1011  // Create and allocate tensors
1012  layer->m_BasicParameters.m_InputToForgetWeights = std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
1013  layer->m_BasicParameters.m_InputToCellWeights = std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
1014  layer->m_BasicParameters.m_InputToOutputWeights = std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
1015 
1017  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
1019  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
1021  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
1022 
1023  layer->m_BasicParameters.m_ForgetGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
1024  layer->m_BasicParameters.m_CellBias = std::make_unique<ScopedTensorHandle>(biasInfo);
1025  layer->m_BasicParameters.m_OutputGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
1026 
1028  std::make_unique<ScopedTensorHandle>(layerNormWeightsInfo);
1030  std::make_unique<ScopedTensorHandle>(layerNormWeightsInfo);
1032  std::make_unique<ScopedTensorHandle>(layerNormWeightsInfo);
1033 
1034  layer->m_BasicParameters.m_InputToForgetWeights->Allocate();
1035  layer->m_BasicParameters.m_InputToCellWeights->Allocate();
1036  layer->m_BasicParameters.m_InputToOutputWeights->Allocate();
1037 
1038  layer->m_BasicParameters.m_RecurrentToForgetWeights->Allocate();
1039  layer->m_BasicParameters.m_RecurrentToCellWeights->Allocate();
1040  layer->m_BasicParameters.m_RecurrentToOutputWeights->Allocate();
1041 
1042  layer->m_BasicParameters.m_ForgetGateBias->Allocate();
1043  layer->m_BasicParameters.m_CellBias->Allocate();
1044  layer->m_BasicParameters.m_OutputGateBias->Allocate();
1045 
1047  layer->m_LayerNormParameters.m_CellLayerNormWeights->Allocate();
1049 
1050  // Input and output layers
1051  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1052  Layer* const outputStateIn = graph.AddLayer<InputLayer>(1, "outputStateIn");
1053  Layer* const cellStateIn = graph.AddLayer<InputLayer>(2, "cellStateIn");
1054 
1055  Layer* const outputStateOut = graph.AddLayer<OutputLayer>(0, "outputStateOut");
1056  Layer* const cellStateOut = graph.AddLayer<OutputLayer>(1, "cellStateOut");
1057  Layer* const output = graph.AddLayer<OutputLayer>(2, "output");
1058 
1059  // Input/Output tensor info
1060  armnn::TensorInfo inputInfo({numBatches , inputSize},
1062  inputScale,
1063  inputOffset);
1064 
1065  armnn::TensorInfo cellStateInfo({numBatches , numUnits},
1067  cellStateScale,
1068  cellStateOffset);
1069 
1070  armnn::TensorInfo outputStateInfo({numBatches , outputSize},
1072  outputScale,
1073  outputOffset);
1074 
1075  // Connect layers to slots
1076  Connect(input, layer, inputInfo, 0, 0);
1077  Connect(outputStateIn, layer, outputStateInfo, 0, 1);
1078  Connect(cellStateIn, layer, cellStateInfo, 0, 2);
1079 
1080  Connect(layer, outputStateOut, outputStateInfo, 0, 0);
1081  Connect(layer, cellStateOut, cellStateInfo, 1, 0);
1082  Connect(layer, output, outputStateInfo, 2, 0);
1083 
1084  CreateTensorHandles(graph, factory);
1085 
1086  // Create and check workload
1087  auto workload = MakeAndCheckWorkload<QLstmWorkload>(*layer, factory);
1088  QLstmQueueDescriptor queueDescriptor = workload->GetData();
1089  CHECK(queueDescriptor.m_Parameters.m_CellClip == 0.0f);
1090  CHECK(queueDescriptor.m_Parameters.m_ProjectionClip == 0.0f);
1091  CHECK(queueDescriptor.m_Inputs.size() == 3);
1092  CHECK(queueDescriptor.m_Outputs.size() == 3);
1093 
1094  CHECK((queueDescriptor.m_InputToForgetWeights->GetTensorInfo() == inputWeightsInfo));
1095  CHECK((queueDescriptor.m_InputToCellWeights->GetTensorInfo() == inputWeightsInfo));
1096  CHECK((queueDescriptor.m_InputToOutputWeights->GetTensorInfo() == inputWeightsInfo));
1097 
1098  CHECK((queueDescriptor.m_RecurrentToForgetWeights->GetTensorInfo() == recurrentWeightsInfo));
1099  CHECK((queueDescriptor.m_RecurrentToCellWeights->GetTensorInfo() == recurrentWeightsInfo));
1100  CHECK((queueDescriptor.m_RecurrentToOutputWeights->GetTensorInfo() == recurrentWeightsInfo));
1101 
1102  CHECK((queueDescriptor.m_ForgetGateBias->GetTensorInfo() == biasInfo));
1103  CHECK((queueDescriptor.m_CellBias->GetTensorInfo() == biasInfo));
1104  CHECK((queueDescriptor.m_OutputGateBias->GetTensorInfo() == biasInfo));
1105 
1106  return workload;
1107 }
1108 
1109 template<typename Convolution2dWorkload, armnn::DataType DataType>
1110 std::unique_ptr<Convolution2dWorkload> CreateDirectConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory,
1111  armnn::Graph& graph)
1112 {
1113  // Creates the layer we're testing.
1114  Convolution2dDescriptor layerDesc;
1115  layerDesc.m_PadLeft = 1;
1116  layerDesc.m_PadRight = 1;
1117  layerDesc.m_PadTop = 1;
1118  layerDesc.m_PadBottom = 1;
1119  layerDesc.m_StrideX = 1;
1120  layerDesc.m_StrideY = 1;
1121  layerDesc.m_BiasEnabled = true;
1122 
1123  Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
1124 
1125  float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0;
1126  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0;
1127 
1128  TensorShape biasShape = TensorShape{ 2 };
1129  TensorShape weightShape = TensorShape{ 2, 3, 3, 3 };
1130  armnn::TensorInfo weightsTensorInfo(weightShape, DataType, inputsQScale);
1131  weightsTensorInfo.SetConstant();
1132  armnn::TensorInfo biasTensorInfo(biasShape, GetBiasDataType(DataType), inputsQScale);
1133  biasTensorInfo.SetConstant();
1134 
1135  layer->m_Weight = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
1136  layer->m_Bias = std::make_unique<ScopedTensorHandle>(biasTensorInfo);
1137 
1138  layer->m_Weight->Allocate();
1139  layer->m_Bias->Allocate();
1140 
1141  // Creates extra layers.
1142  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1143  auto const weights = graph.AddLayer<ConstantLayer>("weights");
1144  auto const bias = graph.AddLayer<ConstantLayer>("bias");
1145  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1146 
1147  weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
1148  weights->m_LayerOutput->Allocate();
1149  bias->m_LayerOutput = std::make_unique<ScopedTensorHandle>(biasTensorInfo);
1150  bias->m_LayerOutput->Allocate();
1151 
1152  // Connects up.
1153  Connect(input, layer, TensorInfo({2, 3, 6, 6}, DataType, inputsQScale));
1154  Connect(weights, layer, weightsTensorInfo, 0, 1);
1155  Connect(bias, layer, biasTensorInfo, 0, 2);
1156  Connect(layer, output, TensorInfo({2, 2, 6, 6}, DataType, outputQScale));
1157  CreateTensorHandles(graph, factory);
1158 
1159  // Makes the workload and checks it.
1160  auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory);
1161 
1162  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
1163  CHECK(queueDescriptor.m_Parameters.m_StrideX == 1);
1164  CHECK(queueDescriptor.m_Parameters.m_StrideY == 1);
1165  CHECK(queueDescriptor.m_Parameters.m_PadLeft == 1);
1166  CHECK(queueDescriptor.m_Parameters.m_PadRight == 1);
1167  CHECK(queueDescriptor.m_Parameters.m_PadTop == 1);
1168  CHECK(queueDescriptor.m_Parameters.m_PadBottom == 1);
1169  CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == true);
1170 
1171  CHECK(queueDescriptor.m_Inputs.size() == 3);
1172  CHECK(queueDescriptor.m_Outputs.size() == 1);
1173  CHECK((queueDescriptor.m_Weight->GetTensorInfo() == weightsTensorInfo));
1174  CHECK((queueDescriptor.m_Bias->GetTensorInfo() == biasTensorInfo));
1175 
1176  // Returns so we can do extra, backend-specific tests.
1177  return workload;
1178 }
1179 
1180 template <typename DepthwiseConvolution2dFloat32Workload, armnn::DataType DataType>
1181 std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolution2dWorkloadTest(
1182  armnn::IWorkloadFactory& factory, armnn::Graph& graph, DataLayout dataLayout = DataLayout::NCHW)
1183 {
1184  // Creates the layer we're testing.
1186  layerDesc.m_PadLeft = 1;
1187  layerDesc.m_PadRight = 2;
1188  layerDesc.m_PadTop = 1;
1189  layerDesc.m_PadBottom = 2;
1190  layerDesc.m_StrideX = 1;
1191  layerDesc.m_StrideY = 1;
1192  layerDesc.m_BiasEnabled = false;
1193  layerDesc.m_DataLayout = dataLayout;
1194 
1195  float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0;
1196  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0;
1197 
1198  TensorShape weightShape({1, 4, 4, 2});
1199  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
1200  TensorShape{ 2, 2, 5, 5 } : TensorShape{ 2, 5, 5, 2 };
1201  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
1202  TensorShape{ 2, 2, 5, 5 } : TensorShape{ 2, 5, 5, 2 };
1203 
1204  DepthwiseConvolution2dLayer* const layer = graph.AddLayer<DepthwiseConvolution2dLayer>(layerDesc, "layer");
1205 
1206  // As optimization isn't run member variables need to be updated.
1207  layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo(weightShape, DataType)); // [ 1, H, W, I*M ]
1208  layer->m_Weight->Allocate();
1209 
1210  // Creates extra layers.
1211  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1212  Layer* const weights = graph.AddLayer<ConstantLayer>("weights");
1213  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1214 
1215  // Connects up.
1216  Connect(input, layer, TensorInfo(inputShape, DataType, inputsQScale));
1217  Connect(weights, layer, TensorInfo(weightShape, DataType, inputsQScale, 0.0f, true), 0, 1);
1218  Connect(layer, output, TensorInfo(outputShape, DataType, outputQScale));
1219  CreateTensorHandles(graph, factory);
1220 
1221  // Makes the workload and checks it.
1222  auto workload = MakeAndCheckWorkload<DepthwiseConvolution2dFloat32Workload>(*layer, factory);
1223 
1224  DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
1225  CHECK(queueDescriptor.m_Parameters.m_StrideX == 1);
1226  CHECK(queueDescriptor.m_Parameters.m_StrideY == 1);
1227  CHECK(queueDescriptor.m_Parameters.m_PadLeft == 1);
1228  CHECK(queueDescriptor.m_Parameters.m_PadRight == 2);
1229  CHECK(queueDescriptor.m_Parameters.m_PadTop == 1);
1230  CHECK(queueDescriptor.m_Parameters.m_PadBottom == 2);
1231  CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == false);
1232  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
1233 
1234  CHECK(queueDescriptor.m_Inputs.size() == 2);
1235  CHECK(queueDescriptor.m_Outputs.size() == 1);
1236 
1237  // Returns so we can do extra, backend-specific tests.
1238  return workload;
1239 }
1240 
1241 template <typename FullyConnectedWorkload, armnn::DataType DataType>
1242 std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWorkloadTest(armnn::IWorkloadFactory& factory,
1243  armnn::Graph& graph)
1244 {
1245  // Creates the layer we're testing.
1246  FullyConnectedDescriptor layerDesc;
1247  layerDesc.m_BiasEnabled = false;
1248  layerDesc.m_TransposeWeightMatrix = true;
1249 
1250  FullyConnectedLayer* const layer = graph.AddLayer<FullyConnectedLayer>(layerDesc, "layer");
1251 
1252  float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0;
1253  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0;
1254 
1255  // As optimization isn't run member variables need to be updated.
1256  layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo({7, 20}, DataType, inputsQScale, 0));
1257  layer->m_Weight->Allocate();
1258 
1259  armnn::TensorInfo weightsTensorInfo({7, 20}, DataType, inputsQScale);
1260  weightsTensorInfo.SetConstant();
1261 
1262  // Creates extra layers.
1263  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1264  auto const weights = graph.AddLayer<ConstantLayer>("weights");
1265  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1266 
1267  weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
1268  weights->m_LayerOutput->Allocate();
1269 
1270  // Connects up.
1271  Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale), 0, 0);
1272  Connect(weights, layer, weightsTensorInfo, 0, 1);
1273  Connect(layer, output, TensorInfo({3, 7}, DataType, outputQScale));
1274  CreateTensorHandles(graph, factory);
1275 
1276  // Makes the workload and checks it.
1277  auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, factory);
1278 
1279  FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
1280  CHECK(queueDescriptor.m_Parameters.m_TransposeWeightMatrix == true);
1281 
1282  CHECK(queueDescriptor.m_Inputs.size() == 2);
1283  CHECK(queueDescriptor.m_Outputs.size() == 1);
1284 
1285  // Returns so we can do extra, backend-specific tests.
1286  return workload;
1287 }
1288 
1289 template <typename FullyConnectedWorkload, armnn::DataType DataType>
1290 std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWithBlobWorkloadTest
1291  (armnn::IWorkloadFactory& factory,
1292  armnn::Graph& graph)
1293 {
1294  // Creates the layer we're testing.
1295  FullyConnectedDescriptor layerDesc;
1296  layerDesc.m_BiasEnabled = true;
1297  layerDesc.m_TransposeWeightMatrix = true;
1298 
1299  FullyConnectedLayer* const layer = graph.AddLayer<FullyConnectedLayer>(layerDesc, "layer");
1300 
1301  float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0;
1302  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0;
1303 
1304  // As optimization isn't run member variables need to be updated.
1305  layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo({7, 20}, DataType, inputsQScale, 0));
1306  layer->m_Bias = std::make_unique<ScopedTensorHandle>(TensorInfo({7}, GetBiasDataType(DataType), inputsQScale));
1307  layer->m_Weight->Allocate();
1308  layer->m_Bias->Allocate();
1309 
1310  armnn::TensorInfo weightsTensorInfo({7, 20}, DataType, inputsQScale);
1311  armnn::TensorInfo biasesTensorInfo({7}, GetBiasDataType(DataType), inputsQScale);
1312  weightsTensorInfo.SetConstant();
1313  biasesTensorInfo.SetConstant();
1314 
1315  auto activationDesc = std::make_shared<ActivationDescriptor>();
1316  activationDesc->m_A = 10.0f;
1317  activationDesc->m_B = 5.0f;
1318  activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
1319 
1320  layer->SetAdditionalInfoForObject(activationDesc);
1321 
1322  // Check that the additional information can be queried from the layer
1323  std::shared_ptr<ActivationDescriptor> activationDescPtr = layer->GetAdditionalInformation<ActivationDescriptor>();
1324  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
1325  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
1326  ARMNN_ASSERT(static_cast<ActivationFunction>(activationDescPtr->m_Function) ==
1328 
1329  // Creates extra layers.
1330  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1331  auto const weights = graph.AddLayer<ConstantLayer>("weights");
1332  auto const biases = graph.AddLayer<ConstantLayer>("biases");
1333  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1334 
1335  weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
1336  weights->m_LayerOutput->Allocate();
1337  biases->m_LayerOutput = std::make_unique<ScopedTensorHandle>(biasesTensorInfo);
1338  biases->m_LayerOutput->Allocate();
1339 
1340  // Connects up.
1341  Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale), 0, 0);
1342  Connect(weights, layer, weightsTensorInfo, 0, 1);
1343  Connect(biases, layer, biasesTensorInfo, 0, 2);
1344  Connect(layer, output, TensorInfo({3, 7}, DataType, outputQScale));
1345  CreateTensorHandles(graph, factory);
1346 
1347  // Makes the workload and checks it.
1348  auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, factory);
1349 
1350  FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
1351 
1352  const ActivationDescriptor* queueDescBlobPtr = queueDescriptor.GetAdditionalInformation<ActivationDescriptor>();
1353  IgnoreUnused(queueDescBlobPtr);
1354 
1355  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
1356  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
1357  ARMNN_ASSERT(
1358  static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
1359  );
1360 
1361  CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == true);
1362  CHECK(queueDescriptor.m_Parameters.m_TransposeWeightMatrix == true);
1363  CHECK(queueDescriptor.m_Inputs.size() == 3);
1364  CHECK(queueDescriptor.m_Outputs.size() == 1);
1365 
1366  // Returns so we can do extra, backend-specific tests.
1367  return workload;
1368 }
1369 
1370 template <typename FullyConnectedWorkload, armnn::DataType DataType>
1371 std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWorkloadWeightsBiasesAsInputsTest
1372  (armnn::IWorkloadFactory& factory,
1373  armnn::Graph& graph)
1374 {
1375  // Creates the layer we're testing.
1376  FullyConnectedDescriptor layerDesc;
1377  layerDesc.m_BiasEnabled = true;
1378  layerDesc.m_TransposeWeightMatrix = true;
1379  layerDesc.m_ConstantWeights = false;
1380 
1381  FullyConnectedLayer* const layer = graph.AddLayer<FullyConnectedLayer>(layerDesc, "layer");
1382 
1383  float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0;
1384  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0;
1385 
1386  // Creates extra layers with weights and biases as input layers.
1387  Layer* const input = graph.AddLayer<InputLayer>(1, "input");
1388  Layer* const weights = graph.AddLayer<InputLayer>(2, "weights");
1389  Layer* const biases = graph.AddLayer<InputLayer>(3, "biases");
1390  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1391 
1392  // Connects up.
1393  Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale), 0, 0);
1394  Connect(weights, layer, TensorInfo({7, 20}, DataType, inputsQScale), 0, 1);
1395  Connect(biases, layer, TensorInfo({7}, GetBiasDataType(DataType), inputsQScale), 0, 2);
1396  Connect(layer, output, TensorInfo({3, 7}, DataType, outputQScale));
1397  CreateTensorHandles(graph, factory);
1398 
1399  // Makes the workload and checks it.
1400  auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, factory);
1401 
1402  FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
1403 
1404  CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == true);
1405  CHECK(queueDescriptor.m_Parameters.m_TransposeWeightMatrix == true);
1406  CHECK(queueDescriptor.m_Parameters.m_ConstantWeights == false);
1407  CHECK(queueDescriptor.m_Inputs.size() == 3);
1408  CHECK(queueDescriptor.m_Outputs.size() == 1);
1409 
1410  // Returns so we can do extra, backend-specific tests.
1411  return workload;
1412 }
1413 
1414 
1415 template <typename NormalizationWorkload, armnn::DataType DataType>
1416 std::unique_ptr<NormalizationWorkload> CreateNormalizationWorkloadTest(armnn::IWorkloadFactory& factory,
1417  armnn::Graph& graph,
1418  DataLayout dataLayout = DataLayout::NCHW)
1419 {
1420  // Creates the layer we're testing.
1421  NormalizationDescriptor layerDesc;
1424  layerDesc.m_NormSize = 3;
1425  layerDesc.m_Alpha = 0.5f;
1426  layerDesc.m_Beta = -1.0f;
1427  layerDesc.m_K = 0.2f;
1428  layerDesc.m_DataLayout = dataLayout;
1429 
1430  NormalizationLayer* layer = graph.AddLayer<NormalizationLayer>(layerDesc, "layer");
1431 
1432  // Creates extra layers.
1433  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1434  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1435 
1436  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
1437  TensorShape{ 3, 5, 5, 1 } : TensorShape{ 3, 1, 5, 5 };
1438  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
1439  TensorShape{ 3, 5, 5, 1 } : TensorShape{ 3, 1, 5, 5 };
1440 
1441  // Connects up.
1442  armnn::TensorInfo inputTensorInfo(inputShape, DataType);
1443  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
1444  Connect(input, layer, inputTensorInfo);
1445  Connect(layer, output, outputTensorInfo);
1446  CreateTensorHandles(graph, factory);
1447 
1448  // Makes the workload and checks it.
1449  auto workload = MakeAndCheckWorkload<NormalizationWorkload>(*layer, factory);
1450 
1451  NormalizationQueueDescriptor queueDescriptor = workload->GetData();
1452  CHECK((queueDescriptor.m_Parameters.m_NormChannelType == NormalizationAlgorithmChannel::Across));
1453  CHECK((queueDescriptor.m_Parameters.m_NormMethodType == NormalizationAlgorithmMethod::LocalBrightness));
1454  CHECK(queueDescriptor.m_Parameters.m_NormSize == 3);
1455  CHECK(queueDescriptor.m_Parameters.m_Alpha == 0.5f);
1456  CHECK(queueDescriptor.m_Parameters.m_Beta == -1.0f);
1457  CHECK(queueDescriptor.m_Parameters.m_K == 0.2f);
1458  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
1459 
1460  CHECK(queueDescriptor.m_Inputs.size() == 1);
1461  CHECK(queueDescriptor.m_Outputs.size() == 1);
1462 
1463  // Returns so we can do extra, backend-specific tests.
1464  return workload;
1465 }
1466 
1467 template <typename Pooling2dWorkload, armnn::DataType DataType>
1468 std::unique_ptr<Pooling2dWorkload> CreatePooling2dWorkloadTest(armnn::IWorkloadFactory& factory,
1469  armnn::Graph& graph,
1470  DataLayout dataLayout = DataLayout::NCHW)
1471 {
1472  // Creates the layer we're testing.
1473  Pooling2dDescriptor layerDesc;
1475  layerDesc.m_PoolWidth = 3;
1476  layerDesc.m_PoolHeight = 3;
1477  layerDesc.m_PadLeft = 2;
1478  layerDesc.m_PadRight = 2;
1479  layerDesc.m_PadTop = 1;
1480  layerDesc.m_PadBottom = 1;
1481  layerDesc.m_StrideX = 2;
1482  layerDesc.m_StrideY = 3;
1484  layerDesc.m_DataLayout = dataLayout;
1485 
1486  Pooling2dLayer* const layer = graph.AddLayer<Pooling2dLayer>(layerDesc, "layer");
1487 
1488  // Create extra layers
1489  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1490  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1491 
1492  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 5, 5} : TensorShape{3, 5, 5, 2};
1493  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 2, 4} : TensorShape{3, 2, 4, 2};
1494 
1495  // Connect up
1496  Connect(input, layer, TensorInfo(inputShape, DataType));
1497  Connect(layer, output, TensorInfo(outputShape, DataType));
1498  CreateTensorHandles(graph, factory);
1499 
1500  // Make the workload and checks it
1501  auto workload = MakeAndCheckWorkload<Pooling2dWorkload>(*layer, factory);
1502 
1503  Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
1504  CHECK((queueDescriptor.m_Parameters.m_PoolType == PoolingAlgorithm::Average));
1505  CHECK((queueDescriptor.m_Parameters.m_OutputShapeRounding == OutputShapeRounding::Floor));
1506  CHECK(queueDescriptor.m_Parameters.m_PoolWidth == 3);
1507  CHECK(queueDescriptor.m_Parameters.m_PoolHeight == 3);
1508  CHECK(queueDescriptor.m_Parameters.m_StrideX == 2);
1509  CHECK(queueDescriptor.m_Parameters.m_StrideY == 3);
1510  CHECK(queueDescriptor.m_Parameters.m_PadLeft == 2);
1511  CHECK(queueDescriptor.m_Parameters.m_PadRight == 2);
1512  CHECK(queueDescriptor.m_Parameters.m_PadTop == 1);
1513  CHECK(queueDescriptor.m_Parameters.m_PadBottom == 1);
1514  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
1515 
1516  CHECK(queueDescriptor.m_Inputs.size() == 1);
1517  CHECK(queueDescriptor.m_Outputs.size() == 1);
1518 
1519  // Return so we can do extra, backend-specific tests
1520  return workload;
1521 }
1522 
1523 template <typename SoftmaxWorkload, armnn::DataType DataType>
1524 std::unique_ptr<SoftmaxWorkload> CreateSoftmaxWorkloadTest(armnn::IWorkloadFactory& factory,
1525  armnn::Graph& graph)
1526 {
1527  // Create the layer we're testing.
1528  SoftmaxDescriptor softmaxDescriptor;
1529  // Set Axis to -1 if CL or Neon until further Axes are supported.
1531  {
1532  softmaxDescriptor.m_Axis = -1;
1533  }
1534 
1535  Layer* const layer = graph.AddLayer<SoftmaxLayer>(softmaxDescriptor, "layer");
1536  // Create extra layers.
1537  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1538  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1539 
1540  // Connect up
1541  armnn::TensorInfo tensorInfo({4, 1}, DataType);
1542  if (DataType == armnn::DataType::QAsymmU8)
1543  {
1544  tensorInfo.SetQuantizationOffset(0);
1545  tensorInfo.SetQuantizationScale(1.f / 256);
1546  }
1547  else if (DataType == armnn::DataType::QAsymmS8)
1548  {
1549  tensorInfo.SetQuantizationOffset(-128);
1550  tensorInfo.SetQuantizationScale(1.f / 256);
1551  }
1552 
1553  Connect(input, layer, tensorInfo);
1554  Connect(layer, output, tensorInfo);
1555  CreateTensorHandles(graph, factory);
1556 
1557  // Make the workload and checks it.
1558  auto workload = MakeAndCheckWorkload<SoftmaxWorkload>(*layer, factory);
1559 
1560  SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
1561  CHECK(queueDescriptor.m_Inputs.size() == 1);
1562  CHECK(queueDescriptor.m_Outputs.size() == 1);
1563 
1564  // Return so we can do extra, backend-specific tests.
1565  return workload;
1566 }
1567 
1568 template<typename SplitterWorkload, armnn::DataType DataType>
1569 std::unique_ptr<SplitterWorkload>
1570  CreateSplitterWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph)
1571 {
1572  // Create the layer we're testing.
1573  // NOTE: need three dimensions channels, height/y, width/x because the Compute
1574  // library restricts subtensors to have the same x and y dimensions as
1575  // their parent tensors, and therefore the origin on the x and y dimension
1576  // has to be zero for any view. So we need a third dimension to split...
1577  // NOTE: arguments are: number of views, number of dimensions.
1578  ViewsDescriptor layerDesc(3, 3);
1579  // NOTE: arguments are: view, dimension, value.
1580  layerDesc.SetViewOriginCoord(0, 0, 0);
1581  layerDesc.SetViewOriginCoord(1, 0, 1);
1582  layerDesc.SetViewOriginCoord(2, 0, 3);
1583 
1584  Layer* const layer = graph.AddLayer<SplitterLayer>(layerDesc, "layer");
1585 
1586  // Adds extra layers.
1587  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1588  Layer* const output0 = graph.AddLayer<OutputLayer>(0, "output0");
1589  Layer* const output1 = graph.AddLayer<OutputLayer>(1, "output1");
1590  Layer* const output2 = graph.AddLayer<OutputLayer>(2, "output2");
1591 
1592  // Connects up.
1593  armnn::TensorInfo tensorInfo({5, 7, 7}, DataType);
1594  Connect(input, layer, tensorInfo);
1595 
1596  armnn::TensorInfo output0Info({1, 7, 7}, DataType);
1597  armnn::TensorInfo output1Info({2, 7, 7}, DataType);
1598  armnn::TensorInfo output2Info({2, 7, 7}, DataType);
1599 
1600  Connect(layer, output0, output0Info, 0, 0);
1601  Connect(layer, output1, output1Info, 1, 0);
1602  Connect(layer, output2, output2Info, 2, 0);
1603 
1604  CreateTensorHandles(graph, factory);
1605 
1606  // Makes the workload and checks it.
1607  auto workload = MakeAndCheckWorkload<SplitterWorkload>(*layer, factory);
1608 
1609  SplitterQueueDescriptor queueDescriptor = workload->GetData();
1610  CHECK(queueDescriptor.m_Inputs.size() == 1);
1611  CHECK(queueDescriptor.m_Outputs.size() == 3);
1612  CHECK(queueDescriptor.m_ViewOrigins.size() == 3);
1613 
1614  CHECK(queueDescriptor.m_ViewOrigins[0].m_Origin[0] == 0);
1615  CHECK(queueDescriptor.m_ViewOrigins[1].m_Origin[0] == 1);
1616  CHECK(queueDescriptor.m_ViewOrigins[2].m_Origin[0] == 3);
1617  CHECK(queueDescriptor.m_ViewOrigins[0].m_Origin[1] == 0);
1618  CHECK(queueDescriptor.m_ViewOrigins[1].m_Origin[1] == 0);
1619  CHECK(queueDescriptor.m_ViewOrigins[2].m_Origin[1] == 0);
1620  CHECK(queueDescriptor.m_ViewOrigins[0].m_Origin[2] == 0);
1621  CHECK(queueDescriptor.m_ViewOrigins[1].m_Origin[2] == 0);
1622  CHECK(queueDescriptor.m_ViewOrigins[2].m_Origin[2] == 0);
1623 
1624  // Returns so we can do extra, backend-specific tests.
1625  return workload;
1626 }
1627 
1628 /// This function constructs a graph with both a splitter and a concat, and returns a pair of the workloads.
1629 template<typename SplitterWorkload, typename ConcatWorkload, armnn::DataType DataType>
1630 std::pair<std::unique_ptr<SplitterWorkload>, std::unique_ptr<ConcatWorkload>>
1631  CreateSplitterConcatWorkloadTest(armnn::IWorkloadFactory &factory, armnn::Graph &graph)
1632 {
1633  armnn::TensorInfo inputTensorInfo({ 1, 2, 100, 10 }, DataType);
1634 
1635  armnn::TensorInfo splitTensorInfo1({ 1, 1, 100, 10 }, DataType);
1636  armnn::TensorInfo splitTensorInfo2({ 1, 1, 100, 10 }, DataType);
1637 
1638  //Constructs the graph.
1639  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1640 
1641  armnn::ViewsDescriptor splitterViews(2);
1642  splitterViews.SetViewOriginCoord(0, 0, 0);
1643  splitterViews.SetViewOriginCoord(0, 1, 0);
1644  splitterViews.SetViewOriginCoord(0, 2, 0);
1645  splitterViews.SetViewOriginCoord(0, 3, 0);
1646 
1647  splitterViews.SetViewOriginCoord(1, 0, 0);
1648  splitterViews.SetViewOriginCoord(1, 1, 1);
1649  splitterViews.SetViewOriginCoord(1, 2, 0);
1650  splitterViews.SetViewOriginCoord(1, 3, 0);
1651 
1652  // create splitter layer
1653  Layer* const splitter = graph.AddLayer<SplitterLayer>(splitterViews, "splitter");
1654  CHECK(splitter);
1655 
1656  armnn::OriginsDescriptor concatViews(2);
1657  concatViews.SetViewOriginCoord(0, 0, 0);
1658  concatViews.SetViewOriginCoord(0, 1, 1);
1659  concatViews.SetViewOriginCoord(0, 2, 0);
1660  concatViews.SetViewOriginCoord(0, 3, 0);
1661 
1662  concatViews.SetViewOriginCoord(1, 0, 0);
1663  concatViews.SetViewOriginCoord(1, 1, 0);
1664  concatViews.SetViewOriginCoord(1, 2, 0);
1665  concatViews.SetViewOriginCoord(1, 3, 0);
1666 
1667  // create concat layer
1668  Layer* const concat = graph.AddLayer<ConcatLayer>(concatViews, "concat");
1669  CHECK(concat);
1670 
1671  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1672 
1673  // Adds connections.
1674  // connect input to splitter
1675  Connect(input, splitter, inputTensorInfo, 0, 0);
1676  // connect splitter[0] to concat[1]
1677  Connect(splitter, concat, splitTensorInfo1, 0, 1); // The splitter & concat are connected up.
1678  // connect splitter[1] to concat[0]
1679  Connect(splitter, concat, splitTensorInfo2, 1, 0); // So that the outputs are flipped round.
1680  // connect concat to output
1681  Connect(concat, output, inputTensorInfo, 0, 0);
1682 
1683  // created tensor handles
1684  CreateTensorHandles(graph, factory);
1685 
1686  // created splitter workload
1687  auto workloadSplitter = MakeAndCheckWorkload<SplitterWorkload>(*splitter, factory);
1688  CHECK(workloadSplitter);
1689  // created concat workload
1690  auto workloadConcat = MakeAndCheckWorkload<ConcatWorkload>(*concat, factory);
1691  CHECK(workloadConcat);
1692 
1693  return {std::move(workloadSplitter), std::move(workloadConcat)};
1694 }
1695 
1696 
1697 /// This function constructs a graph with a splitter with two outputs. Each of the outputs is then
1698 /// connected to two different activation layers
1699 template<typename SplitterWorkload, typename ActivationWorkload, armnn::DataType DataType>
1700 void CreateSplitterMultipleInputsOneOutputWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph,
1701  std::unique_ptr<SplitterWorkload>& wlSplitter,
1702  std::unique_ptr<ActivationWorkload>& wlActiv0_0,
1703  std::unique_ptr<ActivationWorkload>& wlActiv0_1,
1704  std::unique_ptr<ActivationWorkload>& wlActiv1_0,
1705  std::unique_ptr<ActivationWorkload>& wlActiv1_1)
1706 {
1707  armnn::TensorInfo inputTensorInfo ({ 1, 3, 100, 50 }, DataType);
1708  armnn::TensorInfo splitTensorInfo1({ 1, 1, 100, 50 }, DataType);
1709  armnn::TensorInfo splitTensorInfo2({ 1, 2, 100, 50 }, DataType);
1710 
1711  //Constructs the graph.
1712  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1713 
1714  armnn::ViewsDescriptor splitterViews(2);
1715 
1716  splitterViews.SetViewOriginCoord(0, 0, 0);
1717  splitterViews.SetViewOriginCoord(0, 1, 0);
1718  splitterViews.SetViewOriginCoord(0, 2, 0);
1719  splitterViews.SetViewOriginCoord(0, 3, 0);
1720 
1721  splitterViews.SetViewOriginCoord(1, 0, 0);
1722  splitterViews.SetViewOriginCoord(1, 1, 1);
1723  splitterViews.SetViewOriginCoord(1, 2, 0);
1724  splitterViews.SetViewOriginCoord(1, 3, 0);
1725 
1726  Layer* const splitter = graph.AddLayer<SplitterLayer>(splitterViews, "splitter");
1727 
1728  armnn::ActivationDescriptor activationDesc;
1729 
1730  Layer* const activ0_0 = graph.AddLayer<ActivationLayer>(activationDesc, "activ0_0");
1731  Layer* const activ0_1 = graph.AddLayer<ActivationLayer>(activationDesc, "activ0_1");
1732  Layer* const activ1_0 = graph.AddLayer<ActivationLayer>(activationDesc, "activ1_0");
1733  Layer* const activ1_1 = graph.AddLayer<ActivationLayer>(activationDesc, "activ1_1");
1734 
1735  Layer* const output1 = graph.AddLayer<OutputLayer>(1, "output1");
1736  Layer* const output2 = graph.AddLayer<OutputLayer>(2, "output2");
1737  Layer* const output3 = graph.AddLayer<OutputLayer>(3, "output3");
1738  Layer* const output4 = graph.AddLayer<OutputLayer>(4, "output4");
1739 
1740  // Adds connections.
1741  Connect(input, splitter, inputTensorInfo, 0, 0);
1742  Connect(splitter, activ0_0, splitTensorInfo1, 0, 0);
1743  Connect(splitter, activ0_1, splitTensorInfo1, 0, 0);
1744 
1745  Connect(splitter, activ1_0, splitTensorInfo2, 1, 0);
1746  Connect(splitter, activ1_1, splitTensorInfo2, 1, 0);
1747 
1748  Connect(activ0_0, output1, splitTensorInfo1, 0, 0);
1749  Connect(activ0_1, output2, splitTensorInfo1, 0, 0);
1750  Connect(activ1_0, output3, splitTensorInfo2, 0, 0);
1751  Connect(activ1_1, output4, splitTensorInfo2, 0, 0);
1752 
1753  CreateTensorHandles(graph, factory);
1754 
1755  auto workloadSplitter = MakeAndCheckWorkload<SplitterWorkload>(*splitter, factory);
1756  auto workloadActiv0_0 = MakeAndCheckWorkload<ActivationWorkload>(*activ0_0, factory);
1757  auto workloadActiv0_1 = MakeAndCheckWorkload<ActivationWorkload>(*activ0_1, factory);
1758  auto workloadActiv1_0 = MakeAndCheckWorkload<ActivationWorkload>(*activ1_0, factory);
1759  auto workloadActiv1_1 = MakeAndCheckWorkload<ActivationWorkload>(*activ1_1, factory);
1760 
1761  wlSplitter = std::move(workloadSplitter);
1762  wlActiv0_0 = std::move(workloadActiv0_0);
1763  wlActiv0_1 = std::move(workloadActiv0_1);
1764  wlActiv1_0 = std::move(workloadActiv1_0);
1765  wlActiv1_1 = std::move(workloadActiv1_1);
1766 }
1767 
1768 template <typename ResizeWorkload, armnn::DataType DataType>
1769 std::unique_ptr<ResizeWorkload> CreateResizeBilinearWorkloadTest(armnn::IWorkloadFactory& factory,
1770  armnn::Graph& graph,
1771  DataLayout dataLayout = DataLayout::NCHW)
1772 {
1773  TensorShape inputShape;
1774  TensorShape outputShape;
1775 
1776  switch (dataLayout) {
1777  case DataLayout::NHWC:
1778  inputShape = { 2, 4, 4, 3 };
1779  outputShape = { 2, 2, 2, 3 };
1780  break;
1781  case DataLayout::NCHW:
1782  default:
1783  inputShape = { 2, 3, 4, 4 };
1784  outputShape = { 2, 3, 2, 2 };
1785  }
1786 
1787  // Creates the layer we're testing.
1788  ResizeDescriptor resizeDesc;
1789  armnnUtils::DataLayoutIndexed dimensionIndices = dataLayout;
1790  resizeDesc.m_Method = ResizeMethod::Bilinear;
1791  resizeDesc.m_TargetWidth = outputShape[dimensionIndices.GetWidthIndex()];
1792  resizeDesc.m_TargetHeight = outputShape[dimensionIndices.GetHeightIndex()];
1793  resizeDesc.m_DataLayout = dataLayout;
1794  Layer* const layer = graph.AddLayer<ResizeLayer>(resizeDesc, "resize");
1795 
1796  // Creates extra layers.
1797  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1798  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1799 
1800  // Connects up.
1801  armnn::TensorInfo inputTensorInfo(inputShape, DataType);
1802  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
1803  Connect(input, layer, inputTensorInfo);
1804  Connect(layer, output, outputTensorInfo);
1805  CreateTensorHandles(graph, factory);
1806 
1807  // Makes the workload and checks it.
1808  auto workload = MakeAndCheckWorkload<ResizeWorkload>(*layer, factory);
1809 
1810  auto queueDescriptor = workload->GetData();
1811  CHECK(queueDescriptor.m_Inputs.size() == 1);
1812  CHECK(queueDescriptor.m_Outputs.size() == 1);
1813  CHECK(queueDescriptor.m_Parameters.m_DataLayout == dataLayout);
1814 
1815  // Returns so we can do extra, backend-specific tests.
1816  return workload;
1817 }
1818 
1819 template <typename BatchToSpaceNdWorkload, armnn::DataType DataType>
1820 std::unique_ptr<BatchToSpaceNdWorkload> CreateBatchToSpaceNdWorkloadTest(armnn::IWorkloadFactory& factory,
1821  armnn::Graph& graph)
1822 {
1824  Layer* const layer = graph.AddLayer<BatchToSpaceNdLayer>(desc, "batchToSpace");
1825 
1826  // Creates extra layers.
1827  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1828  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1829 
1830  // Connects up.
1831  armnn::TensorInfo tensorInfo({1, 1, 1, 1}, DataType);
1832 
1833  Connect(input, layer, tensorInfo);
1834  Connect(layer, output, tensorInfo);
1835 
1836  CreateTensorHandles(graph, factory);
1837 
1838  // Makes the workload and checks it.
1839  auto workload = MakeAndCheckWorkload<BatchToSpaceNdWorkload>(*layer, factory);
1840 
1841  BatchToSpaceNdQueueDescriptor queueDescriptor = workload->GetData();
1842  CHECK(queueDescriptor.m_Inputs.size() == 1);
1843  CHECK(queueDescriptor.m_Outputs.size() == 1);
1844 
1845  return workload;
1846 }
1847 
1848 template <typename LogSoftmaxWorkload, armnn::DataType DataType>
1849 std::unique_ptr<LogSoftmaxWorkload> CreateLogSoftmaxWorkloadTest(armnn::IWorkloadFactory& factory,
1850  armnn::Graph& graph)
1851 {
1852  // Create the layer we're testing.
1853  LogSoftmaxDescriptor logSoftmaxDescriptor;
1854  // Set Axis to -1 if CL or Neon until further Axes are supported.
1856  {
1857  logSoftmaxDescriptor.m_Axis = -1;
1858  }
1859 
1860  Layer* const layer = graph.AddLayer<LogSoftmaxLayer>(logSoftmaxDescriptor, "layer");
1861  // Create extra layers.
1862  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1863  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1864 
1865  // Connect up
1866  armnn::TensorInfo tensorInfo({4, 1}, DataType);
1867 
1868  Connect(input, layer, tensorInfo);
1869  Connect(layer, output, tensorInfo);
1870  CreateTensorHandles(graph, factory);
1871 
1872  // Make the workload and checks it.
1873  auto workload = MakeAndCheckWorkload<LogSoftmaxWorkload>(*layer, factory);
1874 
1875  LogSoftmaxQueueDescriptor queueDescriptor = workload->GetData();
1876  CHECK(queueDescriptor.m_Inputs.size() == 1);
1877  CHECK(queueDescriptor.m_Outputs.size() == 1);
1878 
1879  // Return so we can do extra, backend-specific tests.
1880  return workload;
1881 }
1882 
1883 template <typename L2NormalizationWorkload, armnn::DataType DataType>
1884 std::unique_ptr<L2NormalizationWorkload> CreateL2NormalizationWorkloadTest(armnn::IWorkloadFactory& factory,
1885  armnn::Graph& graph, DataLayout dataLayout = DataLayout::NCHW)
1886 {
1887  // Creates the layer we're testing.
1888  L2NormalizationDescriptor layerDesc;
1889  layerDesc.m_DataLayout = dataLayout;
1890 
1891  Layer* const layer = graph.AddLayer<L2NormalizationLayer>(layerDesc, "l2norm");
1892 
1893  // Creates extra layers.
1894  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1895  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1896 
1897  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
1898  TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
1899  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
1900  TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
1901 
1902  // Connects up.
1903  armnn::TensorInfo inputTensorInfo(inputShape, DataType);
1904  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
1905  Connect(input, layer, inputTensorInfo);
1906  Connect(layer, output, outputTensorInfo);
1907  CreateTensorHandles(graph, factory);
1908 
1909  // Makes the workload and checks it.
1910  auto workload = MakeAndCheckWorkload<L2NormalizationWorkload>(*layer, factory);
1911 
1912  L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
1913  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
1914  CHECK(queueDescriptor.m_Inputs.size() == 1);
1915  CHECK(queueDescriptor.m_Outputs.size() == 1);
1916 
1917  // Returns so we can do extra, backend-specific tests.
1918  return workload;
1919 }
1920 
1921 template <typename ReshapeWorkload, armnn::DataType DataType>
1922 std::unique_ptr<ReshapeWorkload> CreateReshapeWorkloadTest(armnn::IWorkloadFactory& factory,
1923  armnn::Graph& graph)
1924 {
1925  // Creates the layer we're testing.
1926  TensorShape outputShape({ 1, 4 });
1927  ReshapeDescriptor reshapeDesc;
1928  reshapeDesc.m_TargetShape = outputShape;
1929  Layer* const layer = graph.AddLayer<ReshapeLayer>(reshapeDesc, "layer");
1930 
1931  // Creates extra layers.
1932  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1933  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1934 
1935  // Connects up.
1936  armnn::TensorInfo inputTensorInfo({ 4, 1 }, DataType);
1937  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
1938  Connect(input, layer, inputTensorInfo);
1939  Connect(layer, output, outputTensorInfo);
1940  CreateTensorHandles(graph, factory);
1941 
1942  // Makes the workload and checks it.
1943  auto workload = MakeAndCheckWorkload<ReshapeWorkload>(*layer, factory);
1944 
1945  ReshapeQueueDescriptor queueDescriptor = workload->GetData();
1946  CHECK(queueDescriptor.m_Inputs.size() == 1);
1947  CHECK(queueDescriptor.m_Outputs.size() == 1);
1948 
1949  // Returns so we can do extra, backend-specific tests.
1950  return workload;
1951 }
1952 
1953 template <typename ConvertFp16ToFp32Float32Workload>
1954 std::unique_ptr<ConvertFp16ToFp32Float32Workload> CreateConvertFp16ToFp32WorkloadTest(
1955  armnn::IWorkloadFactory& factory, armnn::Graph& graph)
1956 {
1957  // Creates the layer we're testing.
1958  ConvertFp16ToFp32Layer* const layer = graph.AddLayer<ConvertFp16ToFp32Layer>("Fp16ToFp32Converter");
1959 
1960  // Creates extra layers.
1961  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1962  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1963 
1964  // Connects up.
1965  armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16);
1966  armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32);
1967  Connect(input, layer, inputTensorInfo);
1968  Connect(layer, output, outputTensorInfo);
1969  CreateTensorHandles(graph, factory);
1970 
1971  // Makes the workload and checks it.
1972  auto workload = MakeAndCheckWorkload<ConvertFp16ToFp32Float32Workload>(*layer, factory);
1973 
1974  ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData();
1975  CHECK(queueDescriptor.m_Inputs.size() == 1);
1976  CHECK(queueDescriptor.m_Outputs.size() == 1);
1977 
1978  // Returns so we can do extra, backend-specific tests.
1979  return workload;
1980 }
1981 
1982 template <typename ConvertFp32ToFp16Float16Workload>
1983 std::unique_ptr<ConvertFp32ToFp16Float16Workload> CreateConvertFp32ToFp16WorkloadTest(
1984  armnn::IWorkloadFactory& factory, armnn::Graph& graph)
1985 {
1986  // Creates the layer we're testing.
1987  ConvertFp32ToFp16Layer* const layer = graph.AddLayer<ConvertFp32ToFp16Layer>("Fp32ToFp16Converter");
1988 
1989  // Creates extra layers.
1990  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1991  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1992 
1993  // Connects up.
1994  armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32);
1995  armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16);
1996  Connect(input, layer, inputTensorInfo);
1997  Connect(layer, output, outputTensorInfo);
1998  CreateTensorHandles(graph, factory);
1999 
2000  // Makes the workload and checks it.
2001  auto workload = MakeAndCheckWorkload<ConvertFp32ToFp16Float16Workload>(*layer, factory);
2002 
2003  ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData();
2004  CHECK(queueDescriptor.m_Inputs.size() == 1);
2005  CHECK(queueDescriptor.m_Outputs.size() == 1);
2006 
2007  // Returns so we can do extra, backend-specific tests.
2008  return workload;
2009 }
2010 
2011 template <typename MeanWorkload, armnn::DataType DataType>
2012 std::unique_ptr<MeanWorkload> CreateMeanWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph)
2013 {
2014  // Reduce along the first and second dimensions, and do not keep the reduced dimensions.
2015  MeanDescriptor descriptor({ 1, 2 }, false);
2016 
2017  // Creates the layer we're testing.
2018  Layer* const layer = graph.AddLayer<MeanLayer>(descriptor, "mean");
2019 
2020  // Creates extra layers.
2021  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
2022  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2023 
2024  // Connects up.
2025  armnn::TensorInfo inputTensorInfo({ 1, 3, 7, 4 }, DataType);
2026  armnn::TensorInfo outputTensorInfo({ 1, 4 }, DataType);
2027  Connect(input, layer, inputTensorInfo);
2028  Connect(layer, output, outputTensorInfo);
2029  CreateTensorHandles(graph, factory);
2030 
2031  // Makes the workload and checks it.
2032  auto workload = MakeAndCheckWorkload<MeanWorkload>(*layer, factory);
2033 
2034  MeanQueueDescriptor queueDescriptor = workload->GetData();
2035  CHECK(queueDescriptor.m_Parameters.m_Axis == descriptor.m_Axis);
2036  CHECK(queueDescriptor.m_Parameters.m_KeepDims == descriptor.m_KeepDims);
2037  CHECK(queueDescriptor.m_Inputs.size() == 1);
2038  CHECK(queueDescriptor.m_Outputs.size() == 1);
2039 
2040  // Returns so we can do extra, backend-specific tests.
2041  return workload;
2042 }
2043 
2044 template<typename ConcatWorkload, armnn::DataType DataType>
2045 std::unique_ptr<ConcatWorkload> CreateConcatWorkloadTest(armnn::IWorkloadFactory &factory,
2046  armnn::Graph &graph,
2047  const armnn::TensorShape &outputShape,
2048  unsigned int concatAxis)
2049 {
2050  armnn::TensorInfo inputTensorInfo({ 2, 3, 2, 5 }, DataType);
2051  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
2052 
2053  // Constructs the graph.
2054  Layer* const input0 = graph.AddLayer<InputLayer>(0, "input0");
2055  Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
2056  armnn::OriginsDescriptor descriptor;
2057 
2058  std::vector<armnn::TensorShape> inputShapes{{ 2, 3, 2, 5 }, { 2, 3, 2, 5 }};
2059 
2060  descriptor = CreateDescriptorForConcatenation(inputShapes.begin(),
2061  inputShapes.end(),
2062  concatAxis);
2063 
2064  // create concat layer
2065  Layer* const concat = graph.AddLayer<ConcatLayer>(descriptor, "concat");
2066  CHECK(concat);
2067 
2068  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2069 
2070  // Adds connections.
2071  // connect input0 to concat
2072  Connect(input0, concat, inputTensorInfo, 0, 0);
2073  // connect input1 to concat
2074  Connect(input1, concat, inputTensorInfo, 0, 1);
2075  // connect concat to output
2076  Connect(concat, output, outputTensorInfo, 0, 0);
2077 
2078  // create tensor handles
2079  CreateTensorHandles(graph, factory);
2080 
2081  // create concat workload
2082  auto workloadConcat = MakeAndCheckWorkload<ConcatWorkload>(*concat, factory);
2083  CHECK(workloadConcat);
2084 
2085  return workloadConcat;
2086 }
2087 
2088 template <typename PreCompiledWorkload, armnn::DataType dataType>
2089 std::pair<armnn::IOptimizedNetworkPtr, std::unique_ptr<PreCompiledWorkload>> CreatePreCompiledWorkloadTest(
2090  armnn::IWorkloadFactory& factory,
2091  armnn::Graph& graph,
2092  bool biasEnabled = false)
2093 {
2094  IgnoreUnused(graph);
2095 
2096  // build up the structure of the network
2098 
2099  // Add an input layer
2100  armnn::IConnectableLayer* const inputLayer = net->AddInputLayer(0, "input layer");
2101  CHECK(inputLayer);
2102 
2103  // ArmNN weights tensor shape is OIHW (out channels, in channels, height, width) for NCHW
2104  // ArmNN weights tensor shape is OHWI (out channels, height, width, in channels) for NHWC
2105  // this test is using NHWC, so the weights shape is OHWI
2106  TensorInfo weightsTensorInfo(TensorShape({16, 1, 1, 16}), dataType, 0.9f, 0, true);
2107  unsigned int weightsLength = weightsTensorInfo.GetNumElements();
2108 
2109  using WeightType = armnn::ResolveType<dataType>;
2110  std::vector<WeightType> convWeightsData(weightsLength);
2111  for (unsigned int i = 0; i < weightsLength; ++i)
2112  {
2113  convWeightsData[i] = static_cast<WeightType>(i);
2114  }
2115 
2116  armnn::ConstTensor weights(weightsTensorInfo, convWeightsData);
2117 
2118  // Add a layer that can be used in the PreCompiled layer
2119  armnn::Convolution2dDescriptor convDesc2d;
2120  convDesc2d.m_StrideX = 1;
2121  convDesc2d.m_StrideY = 1;
2122  convDesc2d.m_BiasEnabled = biasEnabled;
2124 
2125  armnn::IConnectableLayer* convLayer = nullptr;
2126  const std::string convLayerName("conv layer");
2127 
2128  if (biasEnabled)
2129  {
2130  constexpr armnn::DataType biasDataType = ( dataType == armnn::DataType::QAsymmU8) ?
2131  armnn::DataType::Signed32 : armnn::DataType::Float32;
2132 
2133  TensorInfo biasTensorInfo(TensorShape({16}), biasDataType, 0.9f * 0.9f, 0, true);
2134  unsigned int biasLength = biasTensorInfo.GetNumElements();
2135 
2136  using BiasType = armnn::ResolveType<biasDataType>;
2137  std::vector<BiasType> biasData(biasLength);
2138  std::fill(biasData.begin(), biasData.end(), static_cast<BiasType>(0));
2139 
2140  armnn::ConstTensor biases(biasTensorInfo, biasData);
2141 
2142  // Create convolution layer with biases
2144  convLayer = net->AddConvolution2dLayer(convDesc2d,
2145  weights,
2146  Optional<ConstTensor>(biases),
2147  convLayerName.c_str());
2149  }
2150  else
2151  {
2152  // Create convolution layer without biases
2154  convLayer = net->AddConvolution2dLayer(convDesc2d,
2155  weights,
2156  EmptyOptional(),
2157  convLayerName.c_str());
2159  }
2160 
2161  CHECK(convLayer);
2162 
2163  // Add an output layer
2164  armnn::IConnectableLayer* const outputLayer = net->AddOutputLayer(0, "output layer");
2165  CHECK(outputLayer);
2166 
2167  // set the tensors in the network (NHWC format)
2168  TensorInfo inputTensorInfo(TensorShape({ 1, 16, 16, 16 }), dataType);
2169  if (dataType == armnn::DataType::QAsymmU8)
2170  {
2171  inputTensorInfo.SetQuantizationOffset(0);
2172  inputTensorInfo.SetQuantizationScale(0.9f);
2173  }
2174 
2175  TensorInfo outputTensorInfo(TensorShape({1, 16, 16, 16}), dataType);
2176  if (dataType == armnn::DataType::QAsymmU8)
2177  {
2178  outputTensorInfo.SetQuantizationOffset(0);
2179  outputTensorInfo.SetQuantizationScale(0.9f);
2180  }
2181 
2182  // Connect the layers
2183  inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
2184  inputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
2185 
2186  convLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
2187  convLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
2188 
2189  // Optimize the network for the backend supported by the factory
2190  std::vector<armnn::BackendId> backends = {factory.GetBackendId()};
2192  armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
2193  armnn::OptimizerOptions optimizerOptions;
2194  armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec(),
2195  optimizerOptions);
2196  CHECK(optimizedNet != nullptr);
2197 
2198  // Find the PreCompiled layer in the optimised graph
2199  armnn::Graph& optimisedGraph = GetGraphForTesting(optimizedNet.get());
2200  Layer* preCompiledLayer = nullptr;
2201  for (auto& layer : optimisedGraph)
2202  {
2203  if (layer->GetType() == LayerType::PreCompiled)
2204  {
2205  preCompiledLayer = layer;
2206  }
2207  }
2208  CHECK(preCompiledLayer != nullptr);
2209 
2210  // Create the TensorHandles.
2211  CreateTensorHandles(optimisedGraph, factory);
2212 
2213  // Make the workload and check it.
2214  auto workload = MakeAndCheckWorkload<PreCompiledWorkload>(*preCompiledLayer, factory);
2215 
2216  PreCompiledQueueDescriptor queueDescriptor = workload->GetData();
2217  CHECK(queueDescriptor.m_Inputs.size() == 1);
2218  CHECK(queueDescriptor.m_Outputs.size() == 1);
2219 
2220  // Returns the workload so we can do extra, backend-specific tests.
2221  // NOTE: We need to return the optimised network as well, otherwise it gets
2222  // out of scope and the tensor handles get destructed
2223  return std::make_pair(std::move(optimizedNet), std::move(workload));
2224 }
2225 
2226 template<typename ConstantWorkload, armnn::DataType DataType>
2227 std::unique_ptr<ConstantWorkload> CreateConstantWorkloadTest(armnn::IWorkloadFactory& factory,
2228  armnn::Graph& graph,
2229  const armnn::TensorShape& outputShape)
2230 {
2231  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
2232 
2233  // create constant layer
2234  auto constant = graph.AddLayer<ConstantLayer>("constant");
2235  CHECK(constant);
2236  constant->m_LayerOutput = std::make_unique<ScopedTensorHandle>(outputTensorInfo);
2237 
2238  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2239 
2240  // Adds connections.
2241  // connect constant to output
2242  Connect(constant, output, outputTensorInfo, 0, 0);
2243 
2244  // create tensor handles
2245  CreateTensorHandles(graph, factory);
2246 
2247  // create Constant workload"
2248  auto workloadConstant = MakeAndCheckWorkload<ConstantWorkload>(*constant, factory);
2249  CHECK(workloadConstant);
2250 
2251  return workloadConstant;
2252 }
2253 
2254 template <typename PreluWorkload>
2255 std::unique_ptr<PreluWorkload> CreatePreluWorkloadTest(armnn::IWorkloadFactory& factory,
2256  armnn::Graph& graph,
2257  const armnn::TensorShape& inputShape,
2258  const armnn::TensorShape& alphaShape,
2259  const armnn::TensorShape& outputShape,
2260  armnn::DataType dataType)
2261 {
2262  // Creates the PReLU layer
2263  Layer* const layer = graph.AddLayer<PreluLayer>("prelu");
2264  CHECK(layer != nullptr);
2265 
2266  // Creates extra layers
2267  Layer* const input = graph.AddLayer<InputLayer> (0, "input");
2268  Layer* const alpha = graph.AddLayer<InputLayer> (1, "alpha");
2269  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2270  CHECK(input != nullptr);
2271  CHECK(alpha != nullptr);
2272  CHECK(output != nullptr);
2273 
2274  // Connects up
2275  armnn::TensorInfo inputTensorInfo (inputShape, dataType);
2276  armnn::TensorInfo alphaTensorInfo (alphaShape, dataType);
2277  armnn::TensorInfo outputTensorInfo(outputShape, dataType);
2278  Connect(input, layer, inputTensorInfo, 0, 0);
2279  Connect(alpha, layer, alphaTensorInfo, 0, 1);
2280  Connect(layer, output, outputTensorInfo, 0, 0);
2281  CreateTensorHandles(graph, factory);
2282 
2283  // Makes the workload and checks it
2284  auto workload = MakeAndCheckWorkload<PreluWorkload>(*layer, factory);
2285 
2286  PreluQueueDescriptor queueDescriptor = workload->GetData();
2287  CHECK(queueDescriptor.m_Inputs.size() == 2);
2288  CHECK(queueDescriptor.m_Outputs.size() == 1);
2289 
2290  // Returns so we can do extra, backend-specific tests.
2291  return workload;
2292 }
2293 
2294 template <typename SpaceToDepthWorkload, armnn::DataType DataType>
2295 std::unique_ptr<SpaceToDepthWorkload> CreateSpaceToDepthWorkloadTest(armnn::IWorkloadFactory& factory,
2296  armnn::Graph& graph)
2297 {
2299  desc.m_BlockSize = 2;
2300  Layer* const layer = graph.AddLayer<SpaceToDepthLayer>(desc, "spaceToDepth");
2301 
2302  // Creates extra layers.
2303  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
2304  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2305 
2306  // Connects up.
2307  armnn::TensorInfo inputTensorInfo({ 1, 2, 2, 1 }, DataType);
2308  armnn::TensorInfo outputTensorInfo({ 1, 1, 1, 4 }, DataType);
2309 
2310  Connect(input, layer, inputTensorInfo);
2311  Connect(layer, output, outputTensorInfo);
2312 
2313  CreateTensorHandles(graph, factory);
2314 
2315  // Makes the workload and checks it.
2316  auto workload = MakeAndCheckWorkload<SpaceToDepthWorkload>(*layer, factory);
2317 
2318  SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
2319  CHECK(queueDescriptor.m_Inputs.size() == 1);
2320  CHECK(queueDescriptor.m_Outputs.size() == 1);
2321 
2322  return workload;
2323 }
2324 
2325 template <typename StackWorkload, armnn::DataType DataType>
2326 std::unique_ptr<StackWorkload> CreateStackWorkloadTest(armnn::IWorkloadFactory& factory,
2327  armnn::Graph& graph,
2328  const armnn::TensorShape& inputShape,
2329  const armnn::TensorShape& outputShape,
2330  unsigned int axis,
2331  unsigned int numInputs)
2332 {
2333  armnn::TensorInfo inputTensorInfo(inputShape, DataType);
2334  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
2335 
2336  // Constructs the Stack layer.
2337  armnn::StackDescriptor descriptor(axis, numInputs, inputShape);
2338  Layer* const stackLayer = graph.AddLayer<StackLayer>(descriptor, "stack");
2339  CHECK(stackLayer != nullptr);
2340 
2341  // Constructs layer inputs and output.
2342  std::vector<Layer*> inputs;
2343  for (unsigned int i=0; i<numInputs; ++i)
2344  {
2345  inputs.push_back(graph.AddLayer<InputLayer>(
2346  static_cast<int>(i),
2347  ("input" + std::to_string(i)).c_str()
2348  ));
2349  CHECK(inputs[i] != nullptr);
2350  }
2351  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2352  CHECK(output != nullptr);
2353 
2354  // Adds connections.
2355  for (unsigned int i=0; i<numInputs; ++i)
2356  {
2357  Connect(inputs[i], stackLayer, inputTensorInfo, 0, i);
2358  }
2359  Connect(stackLayer, output, outputTensorInfo, 0, 0);
2360 
2361  CreateTensorHandles(graph, factory);
2362 
2363  auto stackWorkload = MakeAndCheckWorkload<StackWorkload>(*stackLayer, factory);
2364  StackQueueDescriptor queueDescriptor = stackWorkload->GetData();
2365  CHECK(queueDescriptor.m_Inputs.size() == numInputs);
2366  CHECK(queueDescriptor.m_Outputs.size() == 1);
2367 
2368  return stackWorkload;
2369 }
2370 
2371 } // Anonymous namespace
A layer that the constant data can be bound to.
std::shared_ptr< ConstTensorHandle > m_ForgetGateBias
A unique pointer to represent 1D weights tensor with dimensions [num_units].
std::shared_ptr< ConstTensorHandle > m_OutputGateBias
A unique pointer to represent 1D weights tensor with dimensions [num_units].
uint32_t m_PadBottom
Padding bottom value in the height dimension.
bool m_BiasEnabled
Enable/disable bias.
bool m_ProjectionEnabled
Enable/disable the projection layer.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
This layer represents a split operation.
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:49
virtual const BackendId & GetBackendId() const =0
LstmBasicParameters m_BasicParameters
Definition: LstmLayer.hpp:20
This layer represents a batch normalization operation.
A ViewsDescriptor for the SplitterLayer.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:66
uint32_t m_PadBottom
Padding bottom value in the height dimension.
bool m_BiasEnabled
Enable/disable bias.
std::shared_ptr< ConstTensorHandle > m_CellToForgetWeights
A unique pointer to represent 1D weights tensor with dimensions [num_units].
DataLayout
Definition: Types.hpp:62
unsigned int GetWidthIndex() const
float m_K
Kappa value used for the across channel normalization equation.
int m_Axis
Scalar, defaulted to the last index (-1), specifying the dimension the activation will be performed o...
uint32_t m_PadBottom
Padding bottom value in the height dimension.
uint32_t m_PadLeft
Padding left value in the width dimension.
float m_ClippingThresProj
Clipping threshold value for the projection.
A ReshapeDescriptor for the ReshapeLayer.
std::shared_ptr< ConstTensorHandle > m_OutputGateBias
A unique pointer to represent 1D bias tensor with dimensions [num_units] (int32). ...
Definition: QLstmLayer.hpp:35
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
Definition: Deprecated.hpp:33
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
This layer represents a depthwise convolution 2d operation.
std::shared_ptr< ConstTensorHandle > m_LayerOutput
LayerT * AddLayer(Args &&... args)
Adds a new layer, of type LayerType, to the graph constructed with the arguments passed.
Definition: Graph.hpp:425
bool m_TransposeWeightMatrix
Enable/disable transpose weight matrix.
std::vector< BackendOptions > ModelOptions
uint32_t m_PoolWidth
Pooling width value.
bool m_PeepholeEnabled
Enable/disable peephole.
A Convolution2dDescriptor for the Convolution2dLayer.
float m_Alpha
Alpha value for the normalization equation.
uint32_t m_PadLeft
Padding left value in the width dimension.
This layer converts data type Float 16 to Float 32.
float m_HiddenStateScale
Hidden State quantization scale.
float m_OutputIntermediateScale
Output intermediate quantization scale.
ResizeMethod m_Method
The Interpolation method to use (Bilinear, NearestNeighbor).
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:33
std::shared_ptr< ConstTensorHandle > m_ForgetLayerNormWeights
A unique pointer to represent 1D weights tensor with dimensions [num_units] (QSymmS16).
Definition: QLstmLayer.hpp:71
float m_Eps
Value to add to the variance. Used to avoid dividing by zero.
This layer represents a SpaceToDepth operation.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
This layer represents a reshape operation.
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:79
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
std::shared_ptr< ConstTensorHandle > m_InputToOutputWeights
A unique pointer to represent 2D weights tensor with dimensions [num_units, inputSize] (QSymmS8)...
Definition: QLstmLayer.hpp:21
This layer represents an activation operation with the specified activation function.
uint32_t m_PadTop
Padding top value in the height dimension.
std::shared_ptr< ConstTensorHandle > m_Mean
A unique pointer to store Mean values.
uint32_t m_PadRight
Padding right value in the width dimension.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
Copyright (c) 2021 ARM Limited and Contributors.
std::shared_ptr< ConstTensorHandle > m_InputToCellWeights
A unique pointer to represent 2D weights tensor with dimensions [input_size, num_units].
This layer represents a LSTM operation.
Definition: LstmLayer.hpp:16
void IgnoreUnused(Ts &&...)
void SetBackendId(const BackendId &id)
Definition: Layer.hpp:276
A SpaceToDepthDescriptor for the SpaceToDepthLayer.
A BatchToSpaceNdDescriptor for the BatchToSpaceNdLayer.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
std::shared_ptr< ConstTensorHandle > m_InputToCellWeights
A unique pointer to represent 2D weights tensor with dimensions [num_units, inputSize] (QSymmS8)...
Definition: QLstmLayer.hpp:19
std::shared_ptr< ConstTensorHandle > m_Beta
A unique pointer to store Beta values.
unsigned int GetHeightIndex() const
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
QLstmOptLayerNormParameters m_LayerNormParameters
Definition: QLstmLayer.hpp:87
NormalizationAlgorithmMethod m_NormMethodType
Normalization method algorithm to use (LocalBrightness, LocalContrast).
This layer represents a elementwiseUnary operation.
A ResizeBilinearDescriptor for the ResizeBilinearLayer.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
A StackDescriptor for the StackLayer.
std::shared_ptr< ConstTensorHandle > m_CellBias
A unique pointer to represent 1D bias tensor with dimensions [num_units] (int32). ...
Definition: QLstmLayer.hpp:33
TensorShape m_TargetShape
Target shape value.
std::shared_ptr< ConstTensorHandle > m_CellToOutputWeights
A unique pointer to represent 1D weights tensor with dimensions [num_units].
uint32_t m_PoolHeight
Pooling height value.
uint32_t m_PadTop
Padding top value in the height dimension.
std::shared_ptr< ConstTensorHandle > m_InputToForgetWeights
A unique pointer to represent 2D weights tensor with dimensions [num_units, inputSize] (QSymmS8)...
Definition: QLstmLayer.hpp:17
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
std::shared_ptr< ConstTensorHandle > m_RecurrentToCellWeights
A unique pointer to represent 2D weights tensor with dimensions [output_size, num_units].
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
A layer user-provided data can be bound to (e.g. inputs, outputs).
Definition: OutputLayer.hpp:13
std::shared_ptr< ConstTensorHandle > m_CellBias
A unique pointer to represent 1D weights tensor with dimensions [num_units].
bool m_LayerNormEnabled
Enable/disable layer normalization.
DataType
Definition: Types.hpp:48
This layer represents a fully connected operation.
An LstmDescriptor for the LstmLayer.
uint32_t m_PadRight
Padding right value in the width dimension.
#define ARMNN_NO_DEPRECATE_WARN_END
Definition: Deprecated.hpp:34
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
uint32_t m_PadTop
Padding top value in the height dimension.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1847
std::shared_ptr< ConstTensorHandle > m_CellLayerNormWeights
A unique pointer to represent 1D weights tensor with dimensions [num_units] (QSymmS16).
Definition: QLstmLayer.hpp:73
This layer represents a QuantizedLstm operation.
This layer represents a log softmax operation.
std::shared_ptr< ConstTensorHandle > m_ForgetGateBias
A unique pointer to represent 1D bias tensor with dimensions [num_units] (int32). ...
Definition: QLstmLayer.hpp:31
A L2NormalizationDescriptor for the L2NormalizationLayer.
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout...
An OriginsDescriptor for the ConcatLayer.
float m_ProjectionClip
Clipping threshold value for the projection.
A FullyConnectedDescriptor for the FullyConnectedLayer.
bool m_BiasEnabled
Enable/disable bias.
This layer represents a stack operation.
Definition: StackLayer.hpp:13
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
float m_InputIntermediateScale
Input intermediate quantization scale.
This layer represents a merge operation.
Definition: ConcatLayer.hpp:13
This layer represents a softmax operation.
uint32_t m_TargetWidth
Target width value.
bool m_PeepholeEnabled
Enable/disable peephole.
This layer represents a BatchToSpaceNd operation.
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:242
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
A QLstmDescriptor for the QLstmLayer.
std::shared_ptr< ConstTensorHandle > m_RecurrentToOutputWeights
A unique pointer to represent 2D weights tensor with dimensions [output_size, num_units].
std::shared_ptr< ConstTensorHandle > m_Gamma
A unique pointer to store Gamma values.
GPU Execution: OpenCL: ArmCompute.
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
ArmNN performs an optimization on each model/network before it gets loaded for execution.
Definition: INetwork.hpp:137
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:36
min(a, max(b, input)) ReLu1 & ReLu6.
std::shared_ptr< ConstTensorHandle > m_OutputLayerNormWeights
A unique pointer to represent 1D weights tensor with dimensions [num_units] (QSymmS16).
Definition: QLstmLayer.hpp:75
std::shared_ptr< ConstTensorHandle > m_Variance
A unique pointer to store Variance values.
uint32_t m_TargetHeight
Target height value.
uint32_t m_ActivationFunc
The activation function to use.
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
This layer represents a normalization operation.
This layer represents a pooling 2d operation.
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
std::shared_ptr< ConstTensorHandle > m_RecurrentToCellWeights
A unique pointer to represent 2D weights tensor with dimensions [num_units, outputSize] (QSymmS8)...
Definition: QLstmLayer.hpp:26
float m_ClippingThresCell
Clipping threshold value for the cell state.
This layer converts data type Float 32 to Float 16.
unsigned int m_BlockSize
Scalar specifying the input block size. It must be >= 1.
DataType GetBiasDataType(DataType inputDataType)
void SetAdditionalInfoForObject(const AdditionalInfoObjectPtr &additionalInfo)
Definition: Layer.hpp:358
float m_ForgetIntermediateScale
Forget intermediate quantization scale.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
This layer represents an addition operation.
QLstmBasicParameters m_BasicParameters
Definition: QLstmLayer.hpp:83
LstmOptPeepholeParameters m_PeepholeParameters
Definition: LstmLayer.hpp:23
std::shared_ptr< ConstTensorHandle > m_RecurrentToOutputWeights
A unique pointer to represent 2D weights tensor with dimensions [num_units, outputSize] (QSymmS8)...
Definition: QLstmLayer.hpp:28
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:49
NormalizationAlgorithmChannel m_NormChannelType
Normalization channel algorithm to use (Across, Within).
This layer represents a QLstm operation.
Definition: QLstmLayer.hpp:79
float m_CellClip
Clipping threshold value for the cell state.
float m_A
Alpha upper bound value used by the activation functions. (BoundedReLu, Linear, TanH, Elu).
Definition: Descriptors.hpp:61
This layer represents a subtraction operation.
bool m_CifgEnabled
Enable/disable cifg (coupled input & forget gate).
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
A ElementwiseUnaryDescriptor for the ElementwiseUnaryLayer.
std::shared_ptr< ConstTensorHandle > m_InputToForgetWeights
A unique pointer to represent 2D weights tensor with dimensions [input_size, num_units].
PoolingAlgorithm m_PoolType
The pooling algorithm to use (Max. Average, L2).
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
std::shared_ptr< ConstTensorHandle > m_RecurrentToForgetWeights
A unique pointer to represent 2D weights tensor with dimensions [output_size, num_units].
This layer represents a L2 normalization operation.
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
CPU Execution: NEON: ArmCompute.
bool m_ProjectionEnabled
Enable/disable the projection layer.
OutputShapeRounding m_OutputShapeRounding
The rounding method for the output shape. (Floor, Ceiling).
A layer user-provided data can be bound to (e.g. inputs, outputs).
Definition: InputLayer.hpp:13
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
A MeanDescriptor for the MeanLayer.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:514
UnaryOperation
Definition: Types.hpp:124
DataType GetDataType() const
Definition: Layer.cpp:313
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
This layer represents a convolution 2d operation.
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:489
void Connect(armnn::IConnectableLayer *from, armnn::IConnectableLayer *to, const armnn::TensorInfo &tensorInfo, unsigned int fromIndex, unsigned int toIndex)
Definition: TestUtils.cpp:14
OriginsDescriptor CreateDescriptorForConcatenation(TensorShapeIt first, TensorShapeIt last, unsigned int concatenationDimension)
Convenience template to create an OriginsDescriptor to use when creating a ConcatLayer for performing...
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:184
This layer represents a mean operation.
Definition: MeanLayer.hpp:14
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:241
virtual int Connect(IInputSlot &destination)=0
Krichevsky 2012: Local Brightness Normalization.
A Pooling2dDescriptor for the Pooling2dLayer.
A NormalizationDescriptor for the NormalizationLayer.
std::shared_ptr< ConstTensorHandle > m_RecurrentToForgetWeights
A unique pointer to represent 2D weights tensor with dimensions [num_units, outputSize] (QSymmS8)...
Definition: QLstmLayer.hpp:24
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
This layer represents a multiplication operation.
virtual void CreateTensorHandles(const TensorHandleFactoryRegistry &registry, const IWorkloadFactory &factory, const bool IsMemoryManaged=true)
Definition: Layer.cpp:279
virtual std::unique_ptr< IWorkload > CreateWorkload(const IWorkloadFactory &factory) const =0
float m_CellIntermediateScale
Cell intermediate quantization scale.
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:476
float m_B
Beta lower bound value used by the activation functions. (BoundedReLu, Linear, TanH).
Definition: Descriptors.hpp:63
A SoftmaxDescriptor for the SoftmaxLayer.
float m_Beta
Beta value for the normalization equation.
bool m_CifgEnabled
Enable/disable CIFG (coupled input & forget gate).
uint32_t m_NormSize
Depth radius value.
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
Definition: Descriptors.hpp:59
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
Depthwise Convolution 2D layer workload data.
A BatchNormalizationDescriptor for the BatchNormalizationLayer.
uint32_t m_PadLeft
Padding left value in the width dimension.
std::shared_ptr< T > GetAdditionalInformation() const
Definition: Layer.hpp:353
This layer represents a resize operation.
Definition: ResizeLayer.hpp:13
std::shared_ptr< ConstTensorHandle > m_InputToOutputWeights
A unique pointer to represent 2D weights tensor with dimensions [input_size, num_units].
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:467
uint32_t m_PadRight
Padding right value in the width dimension.
int32_t m_HiddenStateZeroPoint
Hidden State zero point.
bool m_ConstantWeights
Enable/disable constant weights and biases.