ArmNN
 21.08
CreateWorkload.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include "TestUtils.hpp"
8 
9 #include <Graph.hpp>
10 #include <Network.hpp>
11 #include <ResolveType.hpp>
12 
14 #include <armnn/utility/Assert.hpp>
17 
21 
22 #include <doctest/doctest.h>
23 
24 #include <utility>
25 
26 using namespace armnn;
27 
28 namespace
29 {
30 
31 using namespace std;
32 
33 // Calls CreateWorkload for a layer, and checks the returned pointer is of the correct type.
34 template<typename Workload>
35 std::unique_ptr<Workload> MakeAndCheckWorkload(Layer& layer,
36  const IWorkloadFactory& factory,
37  const ModelOptions& modelOptions = {})
38 {
39  std::unique_ptr<IWorkload> workload = layer.CreateWorkload(factory);
40  CHECK_MESSAGE(workload.get() == PolymorphicDowncast<Workload*>(workload.get()),
41  "Cannot convert to derived class");
42  std::string reasonIfUnsupported;
43  layer.SetBackendId(factory.GetBackendId());
44  CHECK(factory.IsLayerSupported(layer, layer.GetDataType(), reasonIfUnsupported, modelOptions));
45  return std::unique_ptr<Workload>(static_cast<Workload*>(workload.release()));
46 }
47 
48 // Helper function to create tensor handlers for workloads, assuming they all use the same factory.
49 void CreateTensorHandles(armnn::Graph& graph,
50  armnn::IWorkloadFactory& factory)
51 {
52  TensorHandleFactoryRegistry tmpRegistry;
53  for (auto&& layer : graph.TopologicalSort())
54  {
55  layer->CreateTensorHandles(tmpRegistry, factory);
56  }
57 }
58 
59 /////////////////////////////////////////////////////////////////////////////////////////////
60 // The following functions are called by backendsCommon/test/CreateWorkload*.cpp
61 // They build very simple graphs, and then create a workload.
62 // Some checks are performed on the workload to ensure parameters have been passed correctly.
63 // They return the created workloads so that backend-specific checks can be performed.
64 /////////////////////////////////////////////////////////////////////////////////////////////
65 
66 template <typename ActivationWorkload, armnn::DataType DataType>
67 std::unique_ptr<ActivationWorkload> CreateActivationWorkloadTest(armnn::IWorkloadFactory& factory,
68  armnn::Graph& graph)
69 {
70  // Creates the layer we're testing.
71  ActivationDescriptor layerDesc;
73  layerDesc.m_A = 3.5f;
74  layerDesc.m_B = -10.0f;
75 
76  ActivationLayer* const layer = graph.AddLayer<ActivationLayer>(layerDesc, "layer");
77 
78  // Creates extra layers.
79  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
80  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
81 
82  // Connects up.
83  armnn::TensorInfo tensorInfo({1, 1}, DataType);
84 
85  Connect(input, layer, tensorInfo);
86  Connect(layer, output, tensorInfo);
87 
88  CreateTensorHandles(graph, factory);
89 
90  // Makes the workload and checks it.
91  auto workload = MakeAndCheckWorkload<ActivationWorkload>(*layer, factory);
92 
93  ActivationQueueDescriptor queueDescriptor = workload->GetData();
94  CHECK(queueDescriptor.m_Inputs.size() == 1);
95  CHECK(queueDescriptor.m_Outputs.size() == 1);
96  CHECK(queueDescriptor.m_Parameters.m_A == 3.5f);
97  CHECK(queueDescriptor.m_Parameters.m_B == -10.0f);
98  CHECK((queueDescriptor.m_Parameters.m_Function == ActivationFunction::Abs));
99 
100  // Returns so we can do extra, backend-specific tests.
101  return workload;
102 }
103 
104 template <typename WorkloadType,
105  typename DescriptorType,
106  typename LayerType,
108 std::unique_ptr<WorkloadType> CreateElementwiseWorkloadTest(armnn::IWorkloadFactory & factory,
109  armnn::Graph & graph)
110 {
111  // Creates the layer we're testing.
112  Layer* const layer = graph.AddLayer<LayerType>("layer");
113 
114  // Creates extra layers.
115  Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
116  Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
117  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
118 
119  // Connects up.
120  armnn::TensorInfo tensorInfo({2, 3}, DataType);
121  Connect(input1, layer, tensorInfo, 0, 0);
122  Connect(input2, layer, tensorInfo, 0, 1);
123  Connect(layer, output, tensorInfo);
124  CreateTensorHandles(graph, factory);
125 
126  // Makes the workload and checks it.
127  auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
128 
129  DescriptorType queueDescriptor = workload->GetData();
130  CHECK(queueDescriptor.m_Inputs.size() == 2);
131  CHECK(queueDescriptor.m_Outputs.size() == 1);
132 
133  // Returns so we can do extra, backend-specific tests.
134  return workload;
135 }
136 
137 template<typename WorkloadType,
138  typename DescriptorType,
140 std::unique_ptr<WorkloadType> CreateSubtractionWithBlobWorkloadTest(armnn::IWorkloadFactory& factory,
141  armnn::Graph& graph)
142 {
143  // Creates the layer we're testing.
144  SubtractionLayer* const layer = graph.AddLayer<SubtractionLayer>("layer");
145 
146  auto activationDesc = std::make_shared<ActivationDescriptor>();
147  activationDesc->m_A = 10.0f;
148  activationDesc->m_B = 5.0f;
149  activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
150 
151  layer->SetAdditionalInfoForObject(activationDesc);
152 
153  // Creates extra layers.
154  Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
155  Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
156  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
157 
158  // Connects up.
159  armnn::TensorInfo tensorInfo({2, 3}, DataType);
160  Connect(input1, layer, tensorInfo, 0, 0);
161  Connect(input2, layer, tensorInfo, 0, 1);
162  Connect(layer, output, tensorInfo);
163  CreateTensorHandles(graph, factory);
164 
165  // Check that the additional information can be queried from the layer
166  std::shared_ptr<ActivationDescriptor>
167  activationDescPtr = layer->GetAdditionalInformation<ActivationDescriptor>();
168 
169  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
170  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
171  ARMNN_ASSERT(
172  static_cast<ActivationFunction>(activationDescPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
173  );
174 
175  // Makes the workload and checks it.
176  auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
177 
178  DescriptorType queueDescriptor = workload->GetData();
179 
180  const ActivationDescriptor* queueDescBlobPtr =
181  queueDescriptor.template GetAdditionalInformation<ActivationDescriptor>();
182  IgnoreUnused(queueDescBlobPtr);
183  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
184  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
185  ARMNN_ASSERT(
186  static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
187  );
188 
189  CHECK(queueDescriptor.m_Inputs.size() == 2);
190  CHECK(queueDescriptor.m_Outputs.size() == 1);
191 
192  return workload;
193 }
194 
195 template<typename WorkloadType,
196  typename DescriptorType,
198 std::unique_ptr<WorkloadType> CreateMultiplicationWithBlobWorkloadTest(armnn::IWorkloadFactory& factory,
199  armnn::Graph& graph)
200 {
201  // Creates the layer we're testing.
202  MultiplicationLayer* const layer = graph.AddLayer<MultiplicationLayer>("layer");
203 
204  auto activationDesc = std::make_shared<ActivationDescriptor>();
205  activationDesc->m_A = 10.0f;
206  activationDesc->m_B = 5.0f;
207  activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
208 
209  layer->SetAdditionalInfoForObject(activationDesc);
210 
211  // Creates extra layers.
212  Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
213  Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
214  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
215 
216  // Connects up.
217  armnn::TensorInfo tensorInfo({2, 3}, DataType);
218  Connect(input1, layer, tensorInfo, 0, 0);
219  Connect(input2, layer, tensorInfo, 0, 1);
220  Connect(layer, output, tensorInfo);
221  CreateTensorHandles(graph, factory);
222 
223  // Check that the additional information can be queried from the layer
224  std::shared_ptr<ActivationDescriptor>
225  activationDescPtr = layer->GetAdditionalInformation<ActivationDescriptor>();
226 
227  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
228  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
229  ARMNN_ASSERT(
230  static_cast<ActivationFunction>(activationDescPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
231  );
232 
233  // Makes the workload and checks it.
234  auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
235 
236  DescriptorType queueDescriptor = workload->GetData();
237  CHECK(queueDescriptor.m_Inputs.size() == 2);
238  CHECK(queueDescriptor.m_Outputs.size() == 1);
239  const ActivationDescriptor* queueDescBlobPtr =
240  queueDescriptor.template GetAdditionalInformation<ActivationDescriptor>();
241  IgnoreUnused(queueDescBlobPtr);
242  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
243  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
244  ARMNN_ASSERT(
245  static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
246  );
247 
248  return workload;// Returns so we can do extra, backend-specific tests.
249 }
250 
251 template<typename WorkloadType,
252  typename DescriptorType,
254 std::unique_ptr<WorkloadType> CreateAdditionWithBlobWorkloadTest(armnn::IWorkloadFactory& factory,
255  armnn::Graph& graph)
256 {
257  // Creates the layer we're testing.
258  AdditionLayer* const layer = graph.AddLayer<AdditionLayer>("layer");
259 
260  auto activationDesc = std::make_shared<ActivationDescriptor>();
261  activationDesc->m_A = 10.0f;
262  activationDesc->m_B = 5.0f;
263  activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
264 
265  layer->SetAdditionalInfoForObject(activationDesc);
266 
267  // Creates extra layers.
268  Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
269  Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
270  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
271 
272  // Connects up.
273  armnn::TensorInfo tensorInfo({2, 3}, DataType);
274  Connect(input1, layer, tensorInfo, 0, 0);
275  Connect(input2, layer, tensorInfo, 0, 1);
276  Connect(layer, output, tensorInfo);
277  CreateTensorHandles(graph, factory);
278 
279  // Check that the additional information can be queried from the layer
280  std::shared_ptr<ActivationDescriptor>
281  activationDescPtr = layer->template GetAdditionalInformation<ActivationDescriptor>();
282 
283  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
284  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
285  ARMNN_ASSERT(
286  static_cast<ActivationFunction>(activationDescPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
287  );
288 
289  // Makes the workload and checks it.
290  auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
291 
292  DescriptorType queueDescriptor = workload->GetData();
293  const ActivationDescriptor* queueDescBlobPtr =
294  queueDescriptor.template GetAdditionalInformation<ActivationDescriptor>();
295  IgnoreUnused(queueDescBlobPtr);
296  CHECK(queueDescriptor.m_Inputs.size() == 2);
297  CHECK(queueDescriptor.m_Outputs.size() == 1);
298  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
299  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
300  ARMNN_ASSERT(
301  static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
302  );
303 
304  return workload;
305 }
306 
307 template <typename WorkloadType,
308  typename DescriptorType,
310 std::unique_ptr<WorkloadType> CreateElementwiseUnaryWorkloadTest(armnn::IWorkloadFactory & factory,
311  armnn::Graph & graph,
313 {
315  Layer* const layer = graph.AddLayer<armnn::ElementwiseUnaryLayer>(desc, "layer");
316 
317  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
318  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
319 
320  armnn::TensorInfo tensorInfo({ 2, 3 }, DataType);
321  Connect(input, layer, tensorInfo, 0, 0);
322  Connect(layer, output, tensorInfo, 0, 0);
323  CreateTensorHandles(graph, factory);
324 
325  auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
326  DescriptorType queueDescriptor = workload->GetData();
327 
328  CHECK(queueDescriptor.m_Inputs.size() == 1);
329  CHECK(queueDescriptor.m_Outputs.size() == 1);
330 
331  return workload;
332 }
333 
334 template <typename BatchNormalizationWorkloadType, armnn::DataType DataType>
335 std::unique_ptr<BatchNormalizationWorkloadType> CreateBatchNormalizationWorkloadTest(
336  armnn::IWorkloadFactory& factory, armnn::Graph& graph, DataLayout dataLayout = DataLayout::NCHW)
337 {
338  TensorShape tensorShape;
339  switch (dataLayout)
340  {
341  case DataLayout::NHWC:
342  tensorShape = { 2, 4, 4, 3 };
343  break;
344  case DataLayout::NCHW:
345  default:
346  tensorShape = { 2, 3, 4, 4 };
347  }
348 
349  // Creates the layer we're testing.
351  layerDesc.m_Eps = 0.05f;
352  layerDesc.m_DataLayout = dataLayout;
353 
354  BatchNormalizationLayer* const layer = graph.AddLayer<BatchNormalizationLayer>(layerDesc, "layer");
355 
356  armnn::TensorInfo weightInfo({3}, DataType);
357  layer->m_Mean = std::make_unique<ScopedTensorHandle>(weightInfo);
358  layer->m_Variance = std::make_unique<ScopedTensorHandle>(weightInfo);
359  layer->m_Beta = std::make_unique<ScopedTensorHandle>(weightInfo);
360  layer->m_Gamma = std::make_unique<ScopedTensorHandle>(weightInfo);
361  layer->m_Mean->Allocate();
362  layer->m_Variance->Allocate();
363  layer->m_Beta->Allocate();
364  layer->m_Gamma->Allocate();
365 
366  // Creates extra layers.
367  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
368  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
369 
370  // Connects up.
371  armnn::TensorInfo tensorInfo(tensorShape, DataType);
372  Connect(input, layer, tensorInfo);
373  Connect(layer, output, tensorInfo);
374  CreateTensorHandles(graph, factory);
375 
376  // Makes the workload and checks it.
377  auto workload = MakeAndCheckWorkload<BatchNormalizationWorkloadType>(*layer, factory);
378  BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
379  CHECK(queueDescriptor.m_Parameters.m_Eps == 0.05f);
380  CHECK(queueDescriptor.m_Inputs.size() == 1);
381  CHECK(queueDescriptor.m_Outputs.size() == 1);
382  CHECK((queueDescriptor.m_Mean->GetTensorInfo() == TensorInfo({3}, DataType)));
383  CHECK((queueDescriptor.m_Variance->GetTensorInfo() == TensorInfo({3}, DataType)));
384  CHECK((queueDescriptor.m_Gamma->GetTensorInfo() == TensorInfo({3}, DataType)));
385  CHECK((queueDescriptor.m_Beta->GetTensorInfo() == TensorInfo({3}, DataType)));
386  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
387 
388  // Returns so we can do extra, backend-specific tests.
389  return workload;
390 }
391 
392 template <typename BatchNormalizationWorkloadType, armnn::DataType DataType>
393 std::unique_ptr<BatchNormalizationWorkloadType> CreateBatchNormalizationWithBlobWorkloadTest(
394  armnn::IWorkloadFactory& factory, armnn::Graph& graph, DataLayout dataLayout = DataLayout::NCHW)
395 {
396  TensorShape tensorShape;
397  switch (dataLayout)
398  {
399  case DataLayout::NHWC:
400  tensorShape = { 2, 4, 4, 3 };
401  break;
402  case DataLayout::NCHW:
403  default:
404  tensorShape = { 2, 3, 4, 4 };
405  }
406 
407  // Creates the layer we're testing.
409  layerDesc.m_Eps = 0.05f;
410  layerDesc.m_DataLayout = dataLayout;
411 
412  BatchNormalizationLayer* const layer = graph.AddLayer<BatchNormalizationLayer>(layerDesc, "layer");
413 
414  armnn::TensorInfo weightInfo({3}, DataType);
415  layer->m_Mean = std::make_unique<ScopedTensorHandle>(weightInfo);
416  layer->m_Variance = std::make_unique<ScopedTensorHandle>(weightInfo);
417  layer->m_Beta = std::make_unique<ScopedTensorHandle>(weightInfo);
418  layer->m_Gamma = std::make_unique<ScopedTensorHandle>(weightInfo);
419  layer->m_Mean->Allocate();
420  layer->m_Variance->Allocate();
421  layer->m_Beta->Allocate();
422  layer->m_Gamma->Allocate();
423 
424  auto activationDesc = std::make_shared<ActivationDescriptor>();
425  activationDesc->m_A = 10.0f;
426  activationDesc->m_B = 5.0f;
427  activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
428 
429  layer->SetAdditionalInfoForObject(activationDesc);
430 
431  // Check that the additional information can be queried from the layer
432  std::shared_ptr<ActivationDescriptor> activationDescPtr = layer->GetAdditionalInformation<ActivationDescriptor>();
433  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
434  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
435  ARMNN_ASSERT(
436  static_cast<ActivationFunction>(activationDescPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
437  );
438 
439  // Creates extra layers.
440  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
441  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
442 
443  // Connects up.
444  armnn::TensorInfo tensorInfo(tensorShape, DataType);
445  Connect(input, layer, tensorInfo);
446  Connect(layer, output, tensorInfo);
447  CreateTensorHandles(graph, factory);
448 
449  // Makes the workload and checks it.
450  auto workload = MakeAndCheckWorkload<BatchNormalizationWorkloadType>(*layer, factory);
451  BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
452  const ActivationDescriptor* queueDescBlobPtr = queueDescriptor.GetAdditionalInformation<ActivationDescriptor>();
453  IgnoreUnused(queueDescBlobPtr);
454  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
455  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
456  ARMNN_ASSERT(
457  static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
458  );
459 
460  CHECK(queueDescriptor.m_Parameters.m_Eps == 0.05f);
461  CHECK(queueDescriptor.m_Inputs.size() == 1);
462  CHECK(queueDescriptor.m_Outputs.size() == 1);
463  CHECK((queueDescriptor.m_Mean->GetTensorInfo() == TensorInfo({3}, DataType)));
464  CHECK((queueDescriptor.m_Variance->GetTensorInfo() == TensorInfo({3}, DataType)));
465  CHECK((queueDescriptor.m_Gamma->GetTensorInfo() == TensorInfo({3}, DataType)));
466  CHECK((queueDescriptor.m_Beta->GetTensorInfo() == TensorInfo({3}, DataType)));
467  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
468 
469  // Returns so we can do extra, backend-specific tests.
470  return workload;
471 }
472 
473 template <typename Convolution2dWorkload, armnn::DataType DataType>
474 std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory,
475  armnn::Graph& graph,
476  DataLayout dataLayout = DataLayout::NCHW,
477  const ModelOptions& modelOptions = {})
478 {
479  // Creates the layer we're testing.
480  Convolution2dDescriptor layerDesc;
481  layerDesc.m_PadLeft = 3;
482  layerDesc.m_PadRight = 3;
483  layerDesc.m_PadTop = 1;
484  layerDesc.m_PadBottom = 1;
485  layerDesc.m_StrideX = 2;
486  layerDesc.m_StrideY = 4;
487  layerDesc.m_BiasEnabled = true;
488  layerDesc.m_DataLayout = dataLayout;
489 
490  Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
491 
492  TensorShape weightShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 5, 3} : TensorShape{2, 5, 3, 3};
493  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3};
494  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2};
495 
496  layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo(weightShape, DataType));
497  layer->m_Bias = std::make_unique<ScopedTensorHandle>(TensorInfo({2}, GetBiasDataType(DataType)));
498 
499  layer->m_Weight->Allocate();
500  layer->m_Bias->Allocate();
501 
502  // Creates extra layers.
503  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
504  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
505 
506  // Connects up.
507  Connect(input, layer, TensorInfo(inputShape, DataType));
508  Connect(layer, output, TensorInfo(outputShape, DataType));
509  CreateTensorHandles(graph, factory);
510 
511  // Makes the workload and checks it.
512  auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
513 
514  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
515  CHECK(queueDescriptor.m_Parameters.m_StrideX == 2);
516  CHECK(queueDescriptor.m_Parameters.m_StrideY == 4);
517  CHECK(queueDescriptor.m_Parameters.m_PadLeft == 3);
518  CHECK(queueDescriptor.m_Parameters.m_PadRight == 3);
519  CHECK(queueDescriptor.m_Parameters.m_PadTop == 1);
520  CHECK(queueDescriptor.m_Parameters.m_PadBottom == 1);
521  CHECK(queueDescriptor.m_Parameters.m_BiasEnabled);
522  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
523 
524  CHECK(queueDescriptor.m_Inputs.size() == 1);
525  CHECK(queueDescriptor.m_Outputs.size() == 1);
526  CHECK((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo(weightShape, DataType)));
527  CHECK((queueDescriptor.m_Bias->GetTensorInfo() ==
529 
530  // Returns so we can do extra, backend-specific tests.
531  return workload;
532 }
533 
534 template<typename Convolution2dWorkload, armnn::DataType DataType>
535 std::unique_ptr<Convolution2dWorkload> CreateConvolution2dFusedActivationWithBlobWorkloadTest(
536  armnn::IWorkloadFactory& factory,
537  armnn::Graph& graph,
538  DataLayout dataLayout = DataLayout::NCHW,
539  const ModelOptions& modelOptions = {})
540 {
541  // Creates the layer we're testing.
542  Convolution2dDescriptor layerDesc;
543  layerDesc.m_PadLeft = 3;
544  layerDesc.m_PadRight = 3;
545  layerDesc.m_PadTop = 1;
546  layerDesc.m_PadBottom = 1;
547  layerDesc.m_StrideX = 2;
548  layerDesc.m_StrideY = 4;
549  layerDesc.m_BiasEnabled = true;
550  layerDesc.m_DataLayout = dataLayout;
551 
552 
553  Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
554 
555  TensorShape weightShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 5, 3} : TensorShape{2, 5, 3, 3};
556  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3};
557  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2};
558 
559  layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo(weightShape, DataType));
560  layer->m_Bias = std::make_unique<ScopedTensorHandle>(TensorInfo({2}, GetBiasDataType(DataType)));
561 
562  layer->m_Weight->Allocate();
563  layer->m_Bias->Allocate();
564 
565  auto activationDesc = std::make_shared<ActivationDescriptor>();
566  activationDesc->m_A = 10.0f;
567  activationDesc->m_B = 5.0f;
568  activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
569 
570  layer->SetAdditionalInfoForObject(activationDesc);
571 
572  // Check that the additional information can be queried from the layer
573  std::shared_ptr<ActivationDescriptor> activationDescPtr = layer->GetAdditionalInformation<ActivationDescriptor>();
574 
575  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
576  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
577  ARMNN_ASSERT(
578  static_cast<ActivationFunction>(activationDescPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
579  );
580 
581  // Creates extra layers.
582  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
583  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
584 
585  // Connects up.
586  Connect(input, layer, TensorInfo(inputShape, DataType));
587  Connect(layer, output, TensorInfo(outputShape, DataType));
588  CreateTensorHandles(graph, factory);
589 
590  // Makes the workload and checks it.
591  auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
592 
593  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
594  const ActivationDescriptor* queueDescBlobPtr = queueDescriptor.GetAdditionalInformation<ActivationDescriptor>();
595  IgnoreUnused(queueDescBlobPtr);
596  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
597  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
598  ARMNN_ASSERT(
599  static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
600  );
601 
602  CHECK(queueDescriptor.m_Parameters.m_StrideX == 2);
603  CHECK(queueDescriptor.m_Parameters.m_StrideY == 4);
604  CHECK(queueDescriptor.m_Parameters.m_PadLeft == 3);
605  CHECK(queueDescriptor.m_Parameters.m_PadRight == 3);
606  CHECK(queueDescriptor.m_Parameters.m_PadTop == 1);
607  CHECK(queueDescriptor.m_Parameters.m_PadBottom == 1);
608  CHECK(queueDescriptor.m_Parameters.m_BiasEnabled);
609  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
610  CHECK(queueDescriptor.m_Outputs.size() == 1);
611  CHECK((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo(weightShape, DataType)));
612  CHECK((queueDescriptor.m_Bias->GetTensorInfo() ==
614  CHECK(queueDescriptor.m_Inputs.size() == 1);
615 
616  // Returns so we can do extra, backend-specific tests.
617  return workload;
618 }
619 
620 template <typename Convolution2dWorkload, armnn::DataType DataType>
621 std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadFastMathTest(armnn::IWorkloadFactory& factory,
622  armnn::Graph& graph,
623  DataLayout dataLayout = DataLayout::NCHW,
624  const ModelOptions& modelOptions = {})
625 {
626  // Creates the layer we're testing.
627  Convolution2dDescriptor layerDesc;
628  layerDesc.m_PadLeft = 0;
629  layerDesc.m_PadRight = 0;
630  layerDesc.m_PadTop = 0;
631  layerDesc.m_PadBottom = 0;
632  layerDesc.m_StrideX = 1;
633  layerDesc.m_StrideY = 1;
634  layerDesc.m_BiasEnabled = false;
635  layerDesc.m_DataLayout = dataLayout;
636 
637  Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
638 
639  TensorShape weightShape = TensorShape{32, 32, 3, 3};
640  TensorShape inputShape = TensorShape{1, 32, 149, 149};
641  TensorShape outputShape = TensorShape{1, 32, 147, 147};
642 
643  layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo(weightShape, DataType));
644  layer->m_Bias = std::make_unique<ScopedTensorHandle>(TensorInfo({2}, GetBiasDataType(DataType)));
645 
646  layer->m_Weight->Allocate();
647  layer->m_Bias->Allocate();
648 
649  // Creates extra layers.
650  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
651  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
652 
653  // Connects up.
654  Connect(input, layer, TensorInfo(inputShape, DataType));
655  Connect(layer, output, TensorInfo(outputShape, DataType));
656  CreateTensorHandles(graph, factory);
657 
658  // Makes the workload and checks it.
659  auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
660 
661  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
662  CHECK(queueDescriptor.m_Parameters.m_StrideX == 1);
663  CHECK(queueDescriptor.m_Parameters.m_StrideY == 1);
664  CHECK(queueDescriptor.m_Parameters.m_PadLeft == 0);
665  CHECK(queueDescriptor.m_Parameters.m_PadRight == 0);
666  CHECK(queueDescriptor.m_Parameters.m_PadTop == 0);
667  CHECK(queueDescriptor.m_Parameters.m_PadBottom == 0);
668  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
669 
670  CHECK(queueDescriptor.m_Inputs.size() == 1);
671  CHECK(queueDescriptor.m_Outputs.size() == 1);
672  CHECK((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo(weightShape, DataType)));
673 
674  // Returns so we can do extra, backend-specific tests.
675  return workload;
676 }
677 
678 template <typename LstmWorkload>
679 std::unique_ptr<LstmWorkload> CreateLstmWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph)
680 {
681  // This parameter setting is for withCifgWithPeepholeNoProjection
682  LstmDescriptor layerDesc;
683  layerDesc.m_ActivationFunc = 4;
684  layerDesc.m_ClippingThresCell = 0.0f;
685  layerDesc.m_ClippingThresProj = 0.0f;
686  layerDesc.m_CifgEnabled = true;
687  layerDesc.m_PeepholeEnabled = true;
688  layerDesc.m_ProjectionEnabled = false;
689 
690  LstmLayer* const layer = graph.AddLayer<LstmLayer>(layerDesc, "layer");
691  unsigned int batchSize = 2;
692  unsigned int inputSize = 2;
693  unsigned int numUnits = 4;
694  unsigned int outputSize = 4;
695 
696  layer->m_BasicParameters.m_InputToForgetWeights = std::make_unique<ScopedTensorHandle>
697  (TensorInfo({ numUnits, inputSize }, DataType::Float32));
698  layer->m_BasicParameters.m_InputToCellWeights = std::make_unique<ScopedTensorHandle>
699  (TensorInfo({ numUnits, inputSize }, DataType::Float32));
700  layer->m_BasicParameters.m_InputToOutputWeights = std::make_unique<ScopedTensorHandle>
701  (TensorInfo({ numUnits, inputSize }, DataType::Float32));
702  layer->m_BasicParameters.m_RecurrentToForgetWeights = std::make_unique<ScopedTensorHandle>
703  (TensorInfo({ numUnits, outputSize }, DataType::Float32));
704  layer->m_BasicParameters.m_RecurrentToCellWeights = std::make_unique<ScopedTensorHandle>
705  (TensorInfo({ numUnits, outputSize }, DataType::Float32));
706  layer->m_BasicParameters.m_RecurrentToOutputWeights = std::make_unique<ScopedTensorHandle>
707  (TensorInfo({ numUnits, outputSize }, DataType::Float32));
708  layer->m_BasicParameters.m_ForgetGateBias = std::make_unique<ScopedTensorHandle>
709  (TensorInfo({ numUnits }, DataType::Float32));
710  layer->m_BasicParameters.m_CellBias = std::make_unique<ScopedTensorHandle>
711  (TensorInfo({ numUnits }, DataType::Float32));
712  layer->m_BasicParameters.m_OutputGateBias = std::make_unique<ScopedTensorHandle>
713  (TensorInfo({ numUnits }, DataType::Float32));
714 
715  layer->m_BasicParameters.m_InputToForgetWeights->Allocate();
716  layer->m_BasicParameters.m_InputToCellWeights->Allocate();
717  layer->m_BasicParameters.m_InputToOutputWeights->Allocate();
719  layer->m_BasicParameters.m_RecurrentToCellWeights->Allocate();
721  layer->m_BasicParameters.m_ForgetGateBias->Allocate();
722  layer->m_BasicParameters.m_CellBias->Allocate();
723  layer->m_BasicParameters.m_OutputGateBias->Allocate();
724 
725 
726  if (layerDesc.m_PeepholeEnabled)
727  {
728  layer->m_PeepholeParameters.m_CellToForgetWeights = std::make_unique<ScopedTensorHandle>
729  (TensorInfo({ numUnits }, DataType::Float32));
730  layer->m_PeepholeParameters.m_CellToOutputWeights = std::make_unique<ScopedTensorHandle>
731  (TensorInfo({ numUnits }, DataType::Float32));
732  layer->m_PeepholeParameters.m_CellToForgetWeights->Allocate();
733  layer->m_PeepholeParameters.m_CellToOutputWeights->Allocate();
734  }
735 
736  // create input and output layers
737  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
738  Layer* const outputStateIn = graph.AddLayer<InputLayer>(1, "outputStateIn");
739  Layer* const cellStateIn = graph.AddLayer<InputLayer>(2, "cellStateIn");
740  Layer* const scratchBuffer = graph.AddLayer<OutputLayer>(0, "scratchBuffer");
741  Layer* const outputStateOut = graph.AddLayer<OutputLayer>(1, "outputStateOut");
742  Layer* const cellStateOut = graph.AddLayer<OutputLayer>(2, "cellStateOut");
743  Layer* const output = graph.AddLayer<OutputLayer>(3, "output");
744 
745  // connect up
746  armnn::TensorInfo lstmTensorInfo1({ batchSize, inputSize }, DataType::Float32);
747  armnn::TensorInfo lstmTensorInfo2({ batchSize, numUnits}, DataType::Float32);
748  armnn::TensorInfo lstmTensorInfo3({ batchSize, outputSize }, DataType::Float32);
749  armnn::TensorInfo lstmTensorInfoScratchBuff({ batchSize, numUnits * (layerDesc.m_CifgEnabled ? 3 : 4) },
751  Connect(input, layer, lstmTensorInfo1, 0, 0);
752  Connect(cellStateIn, layer, lstmTensorInfo2, 0, 1);
753  Connect(outputStateIn, layer, lstmTensorInfo3, 0, 2);
754  Connect(layer, scratchBuffer, lstmTensorInfoScratchBuff, 0, 0);
755  Connect(layer, outputStateOut, lstmTensorInfo3, 1, 0);
756  Connect(layer, cellStateOut, lstmTensorInfo2, 2, 0);
757  Connect(layer, output, lstmTensorInfo3, 3, 0);
758 
759  CreateTensorHandles(graph, factory);
760 
761  // make the workload and check it
762  auto workload = MakeAndCheckWorkload<LstmWorkload>(*layer, factory);
763  LstmQueueDescriptor queueDescriptor = workload->GetData();
764  CHECK(queueDescriptor.m_Parameters.m_ActivationFunc == 4);
765  CHECK(queueDescriptor.m_Parameters.m_ClippingThresCell == 0.0f);
766  CHECK(queueDescriptor.m_Parameters.m_ClippingThresProj == 0.0f);
767  CHECK(queueDescriptor.m_Inputs.size() == 3);
768  CHECK(queueDescriptor.m_Outputs.size() == 4);
769 
770  CHECK((queueDescriptor.m_InputToForgetWeights->GetTensorInfo() == TensorInfo({ numUnits, inputSize },
772  CHECK((queueDescriptor.m_OutputGateBias->GetTensorInfo() == TensorInfo({ numUnits },
774  CHECK((queueDescriptor.m_CellBias->GetTensorInfo() == TensorInfo({ numUnits }, DataType::Float32)));
775  return workload;
776 }
777 
778 template <typename QuantizedLstmWorkload>
779 std::unique_ptr<QuantizedLstmWorkload> CreateQuantizedLstmWorkloadTest(armnn::IWorkloadFactory& factory,
780  armnn::Graph& graph)
781 {
782  auto layer = graph.AddLayer<QuantizedLstmLayer>("quantizedLstmlayer");
783  unsigned int numBatches = 2;
784  unsigned int inputSize = 2;
785  unsigned int outputSize = 4;
786 
787  // Scale/Offset for input/output, cellState In/Out, weights, bias
788  float inputOutputScale = 0.0078125f;
789  int32_t inputOutputOffset = 128;
790 
791  float cellStateScale = 0.00048828125f;
792  int32_t cellStateOffset = 0;
793 
794  float weightsScale = 0.00408021f;
795  int32_t weightsOffset = 100;
796 
797  float biasScale = 3.1876640625e-05f;
798  int32_t biasOffset = 0;
799 
800  // Weights and bias tensor and quantization info
801  armnn::TensorInfo inputWeightsInfo({outputSize, inputSize},
803  weightsScale,
804  weightsOffset);
805 
806  armnn::TensorInfo recurrentWeightsInfo({outputSize, outputSize},
808  weightsScale,
809  weightsOffset);
810 
811  armnn::TensorInfo biasInfo({outputSize},
813  biasScale,
814  biasOffset);
815 
816  // Weights and bias
817  layer->m_QuantizedLstmParameters.m_InputToInputWeights =
818  std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
819  layer->m_QuantizedLstmParameters.m_InputToForgetWeights =
820  std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
821  layer->m_QuantizedLstmParameters.m_InputToCellWeights =
822  std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
823  layer->m_QuantizedLstmParameters.m_InputToOutputWeights =
824  std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
825 
826  layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights =
827  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
828  layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights =
829  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
830  layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights =
831  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
832  layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights =
833  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
834 
835  layer->m_QuantizedLstmParameters.m_InputGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
836  layer->m_QuantizedLstmParameters.m_ForgetGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
837  layer->m_QuantizedLstmParameters.m_CellBias = std::make_unique<ScopedTensorHandle>(biasInfo);
838  layer->m_QuantizedLstmParameters.m_OutputGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
839 
840  // Allocate weights and bias
841  layer->m_QuantizedLstmParameters.m_InputToInputWeights->Allocate();
842  layer->m_QuantizedLstmParameters.m_InputToForgetWeights->Allocate();
843  layer->m_QuantizedLstmParameters.m_InputToCellWeights->Allocate();
844  layer->m_QuantizedLstmParameters.m_InputToOutputWeights->Allocate();
845 
846  layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights->Allocate();
847  layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights->Allocate();
848  layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights->Allocate();
849  layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights->Allocate();
850 
851  layer->m_QuantizedLstmParameters.m_InputGateBias->Allocate();
852  layer->m_QuantizedLstmParameters.m_ForgetGateBias->Allocate();
853  layer->m_QuantizedLstmParameters.m_CellBias->Allocate();
854  layer->m_QuantizedLstmParameters.m_OutputGateBias->Allocate();
855 
856  // Create input and output layers
857  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
858  Layer* const cellStateIn = graph.AddLayer<InputLayer>(1, "cellStateIn");
859  Layer* const outputStateIn = graph.AddLayer<InputLayer>(2, "outputStateIn");
860 
861  Layer* const cellStateOut = graph.AddLayer<OutputLayer>(0, "cellStateOut");
862  Layer* const outputStateOut = graph.AddLayer<OutputLayer>(1, "outputStateOut");
863 
864  // Input/output tensor info and quantization info
865  armnn::TensorInfo inputInfo({numBatches , inputSize},
867  inputOutputScale,
868  inputOutputOffset);
869 
870  armnn::TensorInfo cellStateInfo({numBatches , outputSize},
872  cellStateScale,
873  cellStateOffset);
874 
875  armnn::TensorInfo outputStateInfo({numBatches , outputSize},
877  inputOutputScale,
878  inputOutputOffset);
879 
880  // Connect input/output slots
881  Connect(input, layer, inputInfo, 0, 0);
882  Connect(cellStateIn, layer, cellStateInfo, 0, 1);
883  Connect(outputStateIn, layer, outputStateInfo, 0, 2);
884 
885  Connect(layer, cellStateOut, cellStateInfo, 0, 0);
886  Connect(layer, outputStateOut, outputStateInfo, 1, 0);
887 
888  CreateTensorHandles(graph, factory);
889 
890  // Create workload and check layer support
891  auto workload = MakeAndCheckWorkload<QuantizedLstmWorkload>(*layer, factory);
892  QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
893 
894  // Validate input/output sizes
895  CHECK(queueDescriptor.m_Inputs.size() == 3);
896  CHECK(queueDescriptor.m_Outputs.size() == 2);
897 
898  // Validate weight tensor info
899  CHECK((queueDescriptor.m_InputToInputWeights->GetTensorInfo() == inputWeightsInfo));
900  CHECK((queueDescriptor.m_InputToForgetWeights->GetTensorInfo() == inputWeightsInfo));
901  CHECK((queueDescriptor.m_InputToCellWeights->GetTensorInfo() == inputWeightsInfo));
902  CHECK((queueDescriptor.m_InputToOutputWeights->GetTensorInfo() == inputWeightsInfo));
903 
904  CHECK((queueDescriptor.m_RecurrentToInputWeights->GetTensorInfo() == recurrentWeightsInfo));
905  CHECK((queueDescriptor.m_RecurrentToForgetWeights->GetTensorInfo() == recurrentWeightsInfo));
906  CHECK((queueDescriptor.m_RecurrentToCellWeights->GetTensorInfo() == recurrentWeightsInfo));
907  CHECK((queueDescriptor.m_RecurrentToOutputWeights->GetTensorInfo() == recurrentWeightsInfo));
908 
909  CHECK((queueDescriptor.m_InputGateBias->GetTensorInfo() == biasInfo));
910  CHECK((queueDescriptor.m_ForgetGateBias->GetTensorInfo() == biasInfo));
911  CHECK((queueDescriptor.m_CellBias->GetTensorInfo() == biasInfo));
912  CHECK((queueDescriptor.m_OutputGateBias->GetTensorInfo() == biasInfo));
913 
914  return workload;
915 }
916 
917 template <typename QLstmWorkload>
918 std::unique_ptr<QLstmWorkload> CreateQLstmWorkloadTest(armnn::IWorkloadFactory& factory,
919  armnn::Graph& graph)
920 {
921  QLstmDescriptor layerDesc;
922  layerDesc.m_CifgEnabled = true;
923  layerDesc.m_PeepholeEnabled = false;
924  layerDesc.m_ProjectionEnabled = false;
925  layerDesc.m_LayerNormEnabled = true;
926 
927  layerDesc.m_CellClip = 0.0f;
928  layerDesc.m_ProjectionClip = 0.0f;
929 
930  layerDesc.m_HiddenStateZeroPoint = 0;
931  layerDesc.m_HiddenStateScale = 0.007f;
932 
933  layerDesc.m_InputIntermediateScale = 0.007059f;
934  layerDesc.m_ForgetIntermediateScale = 0.007812f;
935  layerDesc.m_CellIntermediateScale = 0.007059f;
936  layerDesc.m_OutputIntermediateScale = 0.007812f;
937 
938  QLstmLayer* const layer = graph.AddLayer<QLstmLayer>(layerDesc, "qLstm");
939 
940  unsigned int numBatches = 2;
941  unsigned int inputSize = 4;
942  unsigned int numUnits = 4;
943  unsigned int outputSize = 4;
944 
945  // Scale/Offset quantization info
946  float inputScale = 0.0078125f;
947  int32_t inputOffset = 0;
948 
949  // if (!projectionEnabled) outputScale == hiddenStateScale
950  float outputScale = layerDesc.m_HiddenStateScale;
951  int32_t outputOffset = layerDesc.m_HiddenStateZeroPoint;
952 
953  float cellStateScale = 3.05176e-05f;
954  int32_t cellStateOffset = 0;
955 
956  float weightsScale = 0.00784314f;
957  int32_t weightsOffset = 0;
958 
959  float layerNormScale = 3.05182e-05f;
960  int32_t layerNormOffset = 0;
961 
962  float biasScale = layerNormScale / 1024;
963  int32_t biasOffset = 0;
964 
965  // Weights and bias tensor and quantization info
966  armnn::TensorInfo inputWeightsInfo({outputSize, inputSize},
968  weightsScale,
969  weightsOffset);
970 
971  armnn::TensorInfo recurrentWeightsInfo({outputSize, outputSize},
973  weightsScale,
974  weightsOffset);
975 
976  armnn::TensorInfo biasInfo({outputSize}, armnn::DataType::Signed32, biasScale, biasOffset);
977 
978  armnn::TensorInfo layerNormWeightsInfo({numUnits}, armnn::DataType::QSymmS16, layerNormScale, layerNormOffset);
979 
980  // Create and allocate tensors
981  layer->m_BasicParameters.m_InputToForgetWeights = std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
982  layer->m_BasicParameters.m_InputToCellWeights = std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
983  layer->m_BasicParameters.m_InputToOutputWeights = std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
984 
986  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
988  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
990  std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
991 
992  layer->m_BasicParameters.m_ForgetGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
993  layer->m_BasicParameters.m_CellBias = std::make_unique<ScopedTensorHandle>(biasInfo);
994  layer->m_BasicParameters.m_OutputGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
995 
997  std::make_unique<ScopedTensorHandle>(layerNormWeightsInfo);
999  std::make_unique<ScopedTensorHandle>(layerNormWeightsInfo);
1001  std::make_unique<ScopedTensorHandle>(layerNormWeightsInfo);
1002 
1003  layer->m_BasicParameters.m_InputToForgetWeights->Allocate();
1004  layer->m_BasicParameters.m_InputToCellWeights->Allocate();
1005  layer->m_BasicParameters.m_InputToOutputWeights->Allocate();
1006 
1007  layer->m_BasicParameters.m_RecurrentToForgetWeights->Allocate();
1008  layer->m_BasicParameters.m_RecurrentToCellWeights->Allocate();
1009  layer->m_BasicParameters.m_RecurrentToOutputWeights->Allocate();
1010 
1011  layer->m_BasicParameters.m_ForgetGateBias->Allocate();
1012  layer->m_BasicParameters.m_CellBias->Allocate();
1013  layer->m_BasicParameters.m_OutputGateBias->Allocate();
1014 
1016  layer->m_LayerNormParameters.m_CellLayerNormWeights->Allocate();
1018 
1019  // Input and output layers
1020  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1021  Layer* const outputStateIn = graph.AddLayer<InputLayer>(1, "outputStateIn");
1022  Layer* const cellStateIn = graph.AddLayer<InputLayer>(2, "cellStateIn");
1023 
1024  Layer* const outputStateOut = graph.AddLayer<OutputLayer>(0, "outputStateOut");
1025  Layer* const cellStateOut = graph.AddLayer<OutputLayer>(1, "cellStateOut");
1026  Layer* const output = graph.AddLayer<OutputLayer>(2, "output");
1027 
1028  // Input/Output tensor info
1029  armnn::TensorInfo inputInfo({numBatches , inputSize},
1031  inputScale,
1032  inputOffset);
1033 
1034  armnn::TensorInfo cellStateInfo({numBatches , numUnits},
1036  cellStateScale,
1037  cellStateOffset);
1038 
1039  armnn::TensorInfo outputStateInfo({numBatches , outputSize},
1041  outputScale,
1042  outputOffset);
1043 
1044  // Connect layers to slots
1045  Connect(input, layer, inputInfo, 0, 0);
1046  Connect(outputStateIn, layer, outputStateInfo, 0, 1);
1047  Connect(cellStateIn, layer, cellStateInfo, 0, 2);
1048 
1049  Connect(layer, outputStateOut, outputStateInfo, 0, 0);
1050  Connect(layer, cellStateOut, cellStateInfo, 1, 0);
1051  Connect(layer, output, outputStateInfo, 2, 0);
1052 
1053  CreateTensorHandles(graph, factory);
1054 
1055  // Create and check workload
1056  auto workload = MakeAndCheckWorkload<QLstmWorkload>(*layer, factory);
1057  QLstmQueueDescriptor queueDescriptor = workload->GetData();
1058  CHECK(queueDescriptor.m_Parameters.m_CellClip == 0.0f);
1059  CHECK(queueDescriptor.m_Parameters.m_ProjectionClip == 0.0f);
1060  CHECK(queueDescriptor.m_Inputs.size() == 3);
1061  CHECK(queueDescriptor.m_Outputs.size() == 3);
1062 
1063  CHECK((queueDescriptor.m_InputToForgetWeights->GetTensorInfo() == inputWeightsInfo));
1064  CHECK((queueDescriptor.m_InputToCellWeights->GetTensorInfo() == inputWeightsInfo));
1065  CHECK((queueDescriptor.m_InputToOutputWeights->GetTensorInfo() == inputWeightsInfo));
1066 
1067  CHECK((queueDescriptor.m_RecurrentToForgetWeights->GetTensorInfo() == recurrentWeightsInfo));
1068  CHECK((queueDescriptor.m_RecurrentToCellWeights->GetTensorInfo() == recurrentWeightsInfo));
1069  CHECK((queueDescriptor.m_RecurrentToOutputWeights->GetTensorInfo() == recurrentWeightsInfo));
1070 
1071  CHECK((queueDescriptor.m_ForgetGateBias->GetTensorInfo() == biasInfo));
1072  CHECK((queueDescriptor.m_CellBias->GetTensorInfo() == biasInfo));
1073  CHECK((queueDescriptor.m_OutputGateBias->GetTensorInfo() == biasInfo));
1074 
1075  return workload;
1076 }
1077 
1078 template <typename Convolution2dWorkload, armnn::DataType DataType>
1079 std::unique_ptr<Convolution2dWorkload> CreateDirectConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory,
1080  armnn::Graph& graph)
1081 {
1082  // Creates the layer we're testing.
1083  Convolution2dDescriptor layerDesc;
1084  layerDesc.m_PadLeft = 1;
1085  layerDesc.m_PadRight = 1;
1086  layerDesc.m_PadTop = 1;
1087  layerDesc.m_PadBottom = 1;
1088  layerDesc.m_StrideX = 1;
1089  layerDesc.m_StrideY = 1;
1090  layerDesc.m_BiasEnabled = true;
1091 
1092  Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
1093 
1094  float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0;
1095  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0;
1096 
1097  layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo({ 2, 3, 3, 3 }, DataType, inputsQScale));
1098  layer->m_Bias = std::make_unique<ScopedTensorHandle>
1099  (TensorInfo({2}, GetBiasDataType(DataType), inputsQScale));
1100  layer->m_Weight->Allocate();
1101  layer->m_Bias->Allocate();
1102 
1103  // Creates extra layers.
1104  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1105  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1106 
1107  // Connects up.
1108  Connect(input, layer, TensorInfo({2, 3, 6, 6}, DataType, inputsQScale));
1109  Connect(layer, output, TensorInfo({2, 2, 6, 6}, DataType, outputQScale));
1110  CreateTensorHandles(graph, factory);
1111 
1112  // Makes the workload and checks it.
1113  auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory);
1114 
1115  Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
1116  CHECK(queueDescriptor.m_Parameters.m_StrideX == 1);
1117  CHECK(queueDescriptor.m_Parameters.m_StrideY == 1);
1118  CHECK(queueDescriptor.m_Parameters.m_PadLeft == 1);
1119  CHECK(queueDescriptor.m_Parameters.m_PadRight == 1);
1120  CHECK(queueDescriptor.m_Parameters.m_PadTop == 1);
1121  CHECK(queueDescriptor.m_Parameters.m_PadBottom == 1);
1122  CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == true);
1123 
1124  CHECK(queueDescriptor.m_Inputs.size() == 1);
1125  CHECK(queueDescriptor.m_Outputs.size() == 1);
1126  CHECK((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({2, 3, 3, 3},
1127  DataType, inputsQScale)));
1128  CHECK((queueDescriptor.m_Bias->GetTensorInfo()
1129  == TensorInfo({2}, GetBiasDataType(DataType), inputsQScale)));
1130 
1131  // Returns so we can do extra, backend-specific tests.
1132  return workload;
1133 }
1134 
1135 template <typename DepthwiseConvolution2dFloat32Workload, armnn::DataType DataType>
1136 std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolution2dWorkloadTest(
1137  armnn::IWorkloadFactory& factory, armnn::Graph& graph, DataLayout dataLayout = DataLayout::NCHW)
1138 {
1139  // Creates the layer we're testing.
1141  layerDesc.m_PadLeft = 1;
1142  layerDesc.m_PadRight = 2;
1143  layerDesc.m_PadTop = 1;
1144  layerDesc.m_PadBottom = 2;
1145  layerDesc.m_StrideX = 1;
1146  layerDesc.m_StrideY = 1;
1147  layerDesc.m_BiasEnabled = false;
1148  layerDesc.m_DataLayout = dataLayout;
1149 
1150  DepthwiseConvolution2dLayer* const layer = graph.AddLayer<DepthwiseConvolution2dLayer>(layerDesc, "layer");
1151 
1152  layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo({1, 4, 4, 2}, DataType)); // [ 1, H, W, I*M ]
1153  layer->m_Weight->Allocate();
1154 
1155  // Creates extra layers.
1156  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1157  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1158 
1159  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
1160  TensorShape{ 2, 2, 5, 5 } : TensorShape{ 2, 5, 5, 2 };
1161  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
1162  TensorShape{ 2, 2, 5, 5 } : TensorShape{ 2, 5, 5, 2 };
1163 
1164  // Connects up.
1165  Connect(input, layer, TensorInfo(inputShape, DataType));
1166  Connect(layer, output, TensorInfo(outputShape, DataType));
1167  CreateTensorHandles(graph, factory);
1168 
1169  // Makes the workload and checks it.
1170  auto workload = MakeAndCheckWorkload<DepthwiseConvolution2dFloat32Workload>(*layer, factory);
1171 
1172  DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
1173  CHECK(queueDescriptor.m_Parameters.m_StrideX == 1);
1174  CHECK(queueDescriptor.m_Parameters.m_StrideY == 1);
1175  CHECK(queueDescriptor.m_Parameters.m_PadLeft == 1);
1176  CHECK(queueDescriptor.m_Parameters.m_PadRight == 2);
1177  CHECK(queueDescriptor.m_Parameters.m_PadTop == 1);
1178  CHECK(queueDescriptor.m_Parameters.m_PadBottom == 2);
1179  CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == false);
1180  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
1181 
1182  CHECK(queueDescriptor.m_Inputs.size() == 1);
1183  CHECK(queueDescriptor.m_Outputs.size() == 1);
1184  CHECK((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({1, 4, 4, 2}, DataType)));
1185 
1186  // Returns so we can do extra, backend-specific tests.
1187  return workload;
1188 }
1189 
1190 template <typename FullyConnectedWorkload, armnn::DataType DataType>
1191 std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWorkloadTest(armnn::IWorkloadFactory& factory,
1192  armnn::Graph& graph)
1193 {
1194  // Creates the layer we're testing.
1195  FullyConnectedDescriptor layerDesc;
1196  layerDesc.m_BiasEnabled = false;
1197  layerDesc.m_TransposeWeightMatrix = true;
1198 
1199  FullyConnectedLayer* const layer = graph.AddLayer<FullyConnectedLayer>(layerDesc, "layer");
1200 
1201  float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0;
1202  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0;
1203 
1204  // As optimization isn't run member variables need to be updated.
1205  layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo({7, 20}, DataType, inputsQScale, 0));
1206  layer->m_Weight->Allocate();
1207 
1208  armnn::TensorInfo weightsTensorInfo({7, 20}, DataType, inputsQScale);
1209  weightsTensorInfo.SetConstant();
1210 
1211  // Creates extra layers.
1212  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1213  auto const weights = graph.AddLayer<ConstantLayer>("weights");
1214  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1215 
1216  weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
1217  weights->m_LayerOutput->Allocate();
1218 
1219  // Connects up.
1220  Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale), 0, 0);
1221  Connect(weights, layer, weightsTensorInfo, 0, 1);
1222  Connect(layer, output, TensorInfo({3, 7}, DataType, outputQScale));
1223  CreateTensorHandles(graph, factory);
1224 
1225  // Makes the workload and checks it.
1226  auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, factory);
1227 
1228  FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
1229  CHECK(queueDescriptor.m_Parameters.m_TransposeWeightMatrix == true);
1230 
1231  CHECK(queueDescriptor.m_Inputs.size() == 2);
1232  CHECK(queueDescriptor.m_Outputs.size() == 1);
1233 
1234  // Returns so we can do extra, backend-specific tests.
1235  return workload;
1236 }
1237 
1238 template <typename FullyConnectedWorkload, armnn::DataType DataType>
1239 std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWithBlobWorkloadTest
1240  (armnn::IWorkloadFactory& factory,
1241  armnn::Graph& graph)
1242 {
1243  // Creates the layer we're testing.
1244  FullyConnectedDescriptor layerDesc;
1245  layerDesc.m_BiasEnabled = true;
1246  layerDesc.m_TransposeWeightMatrix = true;
1247 
1248  FullyConnectedLayer* const layer = graph.AddLayer<FullyConnectedLayer>(layerDesc, "layer");
1249 
1250  float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0;
1251  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0;
1252 
1253  // As optimization isn't run member variables need to be updated.
1254  layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo({7, 20}, DataType, inputsQScale, 0));
1255  layer->m_Bias = std::make_unique<ScopedTensorHandle>(TensorInfo({7}, GetBiasDataType(DataType), inputsQScale));
1256  layer->m_Weight->Allocate();
1257  layer->m_Bias->Allocate();
1258 
1259  armnn::TensorInfo weightsTensorInfo({7, 20}, DataType, inputsQScale);
1260  armnn::TensorInfo biasesTensorInfo({7}, GetBiasDataType(DataType), inputsQScale);
1261  weightsTensorInfo.SetConstant();
1262  biasesTensorInfo.SetConstant();
1263 
1264  auto activationDesc = std::make_shared<ActivationDescriptor>();
1265  activationDesc->m_A = 10.0f;
1266  activationDesc->m_B = 5.0f;
1267  activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
1268 
1269  layer->SetAdditionalInfoForObject(activationDesc);
1270 
1271  // Check that the additional information can be queried from the layer
1272  std::shared_ptr<ActivationDescriptor> activationDescPtr = layer->GetAdditionalInformation<ActivationDescriptor>();
1273  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
1274  ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
1275  ARMNN_ASSERT(static_cast<ActivationFunction>(activationDescPtr->m_Function) ==
1277 
1278  // Creates extra layers.
1279  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1280  auto const weights = graph.AddLayer<ConstantLayer>("weights");
1281  auto const biases = graph.AddLayer<ConstantLayer>("biases");
1282  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1283 
1284  weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
1285  weights->m_LayerOutput->Allocate();
1286  biases->m_LayerOutput = std::make_unique<ScopedTensorHandle>(biasesTensorInfo);
1287  biases->m_LayerOutput->Allocate();
1288 
1289  // Connects up.
1290  Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale), 0, 0);
1291  Connect(weights, layer, weightsTensorInfo, 0, 1);
1292  Connect(biases, layer, biasesTensorInfo, 0, 2);
1293  Connect(layer, output, TensorInfo({3, 7}, DataType, outputQScale));
1294  CreateTensorHandles(graph, factory);
1295 
1296  // Makes the workload and checks it.
1297  auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, factory);
1298 
1299  FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
1300 
1301  const ActivationDescriptor* queueDescBlobPtr = queueDescriptor.GetAdditionalInformation<ActivationDescriptor>();
1302  IgnoreUnused(queueDescBlobPtr);
1303 
1304  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
1305  ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
1306  ARMNN_ASSERT(
1307  static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
1308  );
1309 
1310  CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == true);
1311  CHECK(queueDescriptor.m_Parameters.m_TransposeWeightMatrix == true);
1312  CHECK(queueDescriptor.m_Inputs.size() == 3);
1313  CHECK(queueDescriptor.m_Outputs.size() == 1);
1314 
1315  // Returns so we can do extra, backend-specific tests.
1316  return workload;
1317 }
1318 
1319 template <typename FullyConnectedWorkload, armnn::DataType DataType>
1320 std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWorkloadWeightsBiasesAsInputsTest
1321  (armnn::IWorkloadFactory& factory,
1322  armnn::Graph& graph)
1323 {
1324  // Creates the layer we're testing.
1325  FullyConnectedDescriptor layerDesc;
1326  layerDesc.m_BiasEnabled = true;
1327  layerDesc.m_TransposeWeightMatrix = true;
1328  layerDesc.m_ConstantWeights = false;
1329 
1330  FullyConnectedLayer* const layer = graph.AddLayer<FullyConnectedLayer>(layerDesc, "layer");
1331 
1332  float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0;
1333  float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0;
1334 
1335  // Creates extra layers with weights and biases as input layers.
1336  Layer* const input = graph.AddLayer<InputLayer>(1, "input");
1337  Layer* const weights = graph.AddLayer<InputLayer>(2, "weights");
1338  Layer* const biases = graph.AddLayer<InputLayer>(3, "biases");
1339  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1340 
1341  // Connects up.
1342  Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale), 0, 0);
1343  Connect(weights, layer, TensorInfo({7, 20}, DataType, inputsQScale), 0, 1);
1344  Connect(biases, layer, TensorInfo({7}, GetBiasDataType(DataType), inputsQScale), 0, 2);
1345  Connect(layer, output, TensorInfo({3, 7}, DataType, outputQScale));
1346  CreateTensorHandles(graph, factory);
1347 
1348  // Makes the workload and checks it.
1349  auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, factory);
1350 
1351  FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
1352 
1353  CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == true);
1354  CHECK(queueDescriptor.m_Parameters.m_TransposeWeightMatrix == true);
1355  CHECK(queueDescriptor.m_Parameters.m_ConstantWeights == false);
1356  CHECK(queueDescriptor.m_Inputs.size() == 3);
1357  CHECK(queueDescriptor.m_Outputs.size() == 1);
1358 
1359  // Returns so we can do extra, backend-specific tests.
1360  return workload;
1361 }
1362 
1363 
1364 template <typename NormalizationWorkload, armnn::DataType DataType>
1365 std::unique_ptr<NormalizationWorkload> CreateNormalizationWorkloadTest(armnn::IWorkloadFactory& factory,
1366  armnn::Graph& graph,
1367  DataLayout dataLayout = DataLayout::NCHW)
1368 {
1369  // Creates the layer we're testing.
1370  NormalizationDescriptor layerDesc;
1373  layerDesc.m_NormSize = 3;
1374  layerDesc.m_Alpha = 0.5f;
1375  layerDesc.m_Beta = -1.0f;
1376  layerDesc.m_K = 0.2f;
1377  layerDesc.m_DataLayout = dataLayout;
1378 
1379  NormalizationLayer* layer = graph.AddLayer<NormalizationLayer>(layerDesc, "layer");
1380 
1381  // Creates extra layers.
1382  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1383  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1384 
1385  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
1386  TensorShape{ 3, 5, 5, 1 } : TensorShape{ 3, 1, 5, 5 };
1387  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
1388  TensorShape{ 3, 5, 5, 1 } : TensorShape{ 3, 1, 5, 5 };
1389 
1390  // Connects up.
1391  armnn::TensorInfo inputTensorInfo(inputShape, DataType);
1392  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
1393  Connect(input, layer, inputTensorInfo);
1394  Connect(layer, output, outputTensorInfo);
1395  CreateTensorHandles(graph, factory);
1396 
1397  // Makes the workload and checks it.
1398  auto workload = MakeAndCheckWorkload<NormalizationWorkload>(*layer, factory);
1399 
1400  NormalizationQueueDescriptor queueDescriptor = workload->GetData();
1401  CHECK((queueDescriptor.m_Parameters.m_NormChannelType == NormalizationAlgorithmChannel::Across));
1402  CHECK((queueDescriptor.m_Parameters.m_NormMethodType == NormalizationAlgorithmMethod::LocalBrightness));
1403  CHECK(queueDescriptor.m_Parameters.m_NormSize == 3);
1404  CHECK(queueDescriptor.m_Parameters.m_Alpha == 0.5f);
1405  CHECK(queueDescriptor.m_Parameters.m_Beta == -1.0f);
1406  CHECK(queueDescriptor.m_Parameters.m_K == 0.2f);
1407  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
1408 
1409  CHECK(queueDescriptor.m_Inputs.size() == 1);
1410  CHECK(queueDescriptor.m_Outputs.size() == 1);
1411 
1412  // Returns so we can do extra, backend-specific tests.
1413  return workload;
1414 }
1415 
1416 template <typename Pooling2dWorkload, armnn::DataType DataType>
1417 std::unique_ptr<Pooling2dWorkload> CreatePooling2dWorkloadTest(armnn::IWorkloadFactory& factory,
1418  armnn::Graph& graph,
1419  DataLayout dataLayout = DataLayout::NCHW)
1420 {
1421  // Creates the layer we're testing.
1422  Pooling2dDescriptor layerDesc;
1424  layerDesc.m_PoolWidth = 3;
1425  layerDesc.m_PoolHeight = 3;
1426  layerDesc.m_PadLeft = 2;
1427  layerDesc.m_PadRight = 2;
1428  layerDesc.m_PadTop = 1;
1429  layerDesc.m_PadBottom = 1;
1430  layerDesc.m_StrideX = 2;
1431  layerDesc.m_StrideY = 3;
1433  layerDesc.m_DataLayout = dataLayout;
1434 
1435  Pooling2dLayer* const layer = graph.AddLayer<Pooling2dLayer>(layerDesc, "layer");
1436 
1437  // Create extra layers
1438  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1439  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1440 
1441  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 5, 5} : TensorShape{3, 5, 5, 2};
1442  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 2, 4} : TensorShape{3, 2, 4, 2};
1443 
1444  // Connect up
1445  Connect(input, layer, TensorInfo(inputShape, DataType));
1446  Connect(layer, output, TensorInfo(outputShape, DataType));
1447  CreateTensorHandles(graph, factory);
1448 
1449  // Make the workload and checks it
1450  auto workload = MakeAndCheckWorkload<Pooling2dWorkload>(*layer, factory);
1451 
1452  Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
1453  CHECK((queueDescriptor.m_Parameters.m_PoolType == PoolingAlgorithm::Average));
1454  CHECK((queueDescriptor.m_Parameters.m_OutputShapeRounding == OutputShapeRounding::Floor));
1455  CHECK(queueDescriptor.m_Parameters.m_PoolWidth == 3);
1456  CHECK(queueDescriptor.m_Parameters.m_PoolHeight == 3);
1457  CHECK(queueDescriptor.m_Parameters.m_StrideX == 2);
1458  CHECK(queueDescriptor.m_Parameters.m_StrideY == 3);
1459  CHECK(queueDescriptor.m_Parameters.m_PadLeft == 2);
1460  CHECK(queueDescriptor.m_Parameters.m_PadRight == 2);
1461  CHECK(queueDescriptor.m_Parameters.m_PadTop == 1);
1462  CHECK(queueDescriptor.m_Parameters.m_PadBottom == 1);
1463  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
1464 
1465  CHECK(queueDescriptor.m_Inputs.size() == 1);
1466  CHECK(queueDescriptor.m_Outputs.size() == 1);
1467 
1468  // Return so we can do extra, backend-specific tests
1469  return workload;
1470 }
1471 
1472 template <typename SoftmaxWorkload, armnn::DataType DataType>
1473 std::unique_ptr<SoftmaxWorkload> CreateSoftmaxWorkloadTest(armnn::IWorkloadFactory& factory,
1474  armnn::Graph& graph)
1475 {
1476  // Create the layer we're testing.
1477  SoftmaxDescriptor softmaxDescriptor;
1478  // Set Axis to -1 if CL or Neon until further Axes are supported.
1480  {
1481  softmaxDescriptor.m_Axis = -1;
1482  }
1483 
1484  Layer* const layer = graph.AddLayer<SoftmaxLayer>(softmaxDescriptor, "layer");
1485  // Create extra layers.
1486  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1487  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1488 
1489  // Connect up
1490  armnn::TensorInfo tensorInfo({4, 1}, DataType);
1491  if (DataType == armnn::DataType::QAsymmU8)
1492  {
1493  tensorInfo.SetQuantizationOffset(0);
1494  tensorInfo.SetQuantizationScale(1.f / 256);
1495  }
1496  else if (DataType == armnn::DataType::QAsymmS8)
1497  {
1498  tensorInfo.SetQuantizationOffset(-128);
1499  tensorInfo.SetQuantizationScale(1.f / 256);
1500  }
1501 
1502  Connect(input, layer, tensorInfo);
1503  Connect(layer, output, tensorInfo);
1504  CreateTensorHandles(graph, factory);
1505 
1506  // Make the workload and checks it.
1507  auto workload = MakeAndCheckWorkload<SoftmaxWorkload>(*layer, factory);
1508 
1509  SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
1510  CHECK(queueDescriptor.m_Inputs.size() == 1);
1511  CHECK(queueDescriptor.m_Outputs.size() == 1);
1512 
1513  // Return so we can do extra, backend-specific tests.
1514  return workload;
1515 }
1516 
1517 template<typename SplitterWorkload, armnn::DataType DataType>
1518 std::unique_ptr<SplitterWorkload>
1519  CreateSplitterWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph)
1520 {
1521  // Create the layer we're testing.
1522  // NOTE: need three dimensions channels, height/y, width/x because the Compute
1523  // library restricts subtensors to have the same x and y dimensions as
1524  // their parent tensors, and therefore the origin on the x and y dimension
1525  // has to be zero for any view. So we need a third dimension to split...
1526  // NOTE: arguments are: number of views, number of dimensions.
1527  ViewsDescriptor layerDesc(3, 3);
1528  // NOTE: arguments are: view, dimension, value.
1529  layerDesc.SetViewOriginCoord(0, 0, 0);
1530  layerDesc.SetViewOriginCoord(1, 0, 1);
1531  layerDesc.SetViewOriginCoord(2, 0, 3);
1532 
1533  Layer* const layer = graph.AddLayer<SplitterLayer>(layerDesc, "layer");
1534 
1535  // Adds extra layers.
1536  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1537  Layer* const output0 = graph.AddLayer<OutputLayer>(0, "output0");
1538  Layer* const output1 = graph.AddLayer<OutputLayer>(1, "output1");
1539  Layer* const output2 = graph.AddLayer<OutputLayer>(2, "output2");
1540 
1541  // Connects up.
1542  armnn::TensorInfo tensorInfo({5, 7, 7}, DataType);
1543  Connect(input, layer, tensorInfo);
1544 
1545  armnn::TensorInfo output0Info({1, 7, 7}, DataType);
1546  armnn::TensorInfo output1Info({2, 7, 7}, DataType);
1547  armnn::TensorInfo output2Info({2, 7, 7}, DataType);
1548 
1549  Connect(layer, output0, output0Info, 0, 0);
1550  Connect(layer, output1, output1Info, 1, 0);
1551  Connect(layer, output2, output2Info, 2, 0);
1552 
1553  CreateTensorHandles(graph, factory);
1554 
1555  // Makes the workload and checks it.
1556  auto workload = MakeAndCheckWorkload<SplitterWorkload>(*layer, factory);
1557 
1558  SplitterQueueDescriptor queueDescriptor = workload->GetData();
1559  CHECK(queueDescriptor.m_Inputs.size() == 1);
1560  CHECK(queueDescriptor.m_Outputs.size() == 3);
1561  CHECK(queueDescriptor.m_ViewOrigins.size() == 3);
1562 
1563  CHECK(queueDescriptor.m_ViewOrigins[0].m_Origin[0] == 0);
1564  CHECK(queueDescriptor.m_ViewOrigins[1].m_Origin[0] == 1);
1565  CHECK(queueDescriptor.m_ViewOrigins[2].m_Origin[0] == 3);
1566  CHECK(queueDescriptor.m_ViewOrigins[0].m_Origin[1] == 0);
1567  CHECK(queueDescriptor.m_ViewOrigins[1].m_Origin[1] == 0);
1568  CHECK(queueDescriptor.m_ViewOrigins[2].m_Origin[1] == 0);
1569  CHECK(queueDescriptor.m_ViewOrigins[0].m_Origin[2] == 0);
1570  CHECK(queueDescriptor.m_ViewOrigins[1].m_Origin[2] == 0);
1571  CHECK(queueDescriptor.m_ViewOrigins[2].m_Origin[2] == 0);
1572 
1573  // Returns so we can do extra, backend-specific tests.
1574  return workload;
1575 }
1576 
1577 /// This function constructs a graph with both a splitter and a concat, and returns a pair of the workloads.
1578 template<typename SplitterWorkload, typename ConcatWorkload, armnn::DataType DataType>
1579 std::pair<std::unique_ptr<SplitterWorkload>, std::unique_ptr<ConcatWorkload>>
1580  CreateSplitterConcatWorkloadTest(armnn::IWorkloadFactory &factory, armnn::Graph &graph)
1581 {
1582  armnn::TensorInfo inputTensorInfo({ 1, 2, 100, 10 }, DataType);
1583 
1584  armnn::TensorInfo splitTensorInfo1({ 1, 1, 100, 10 }, DataType);
1585  armnn::TensorInfo splitTensorInfo2({ 1, 1, 100, 10 }, DataType);
1586 
1587  //Constructs the graph.
1588  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1589 
1590  armnn::ViewsDescriptor splitterViews(2);
1591  splitterViews.SetViewOriginCoord(0, 0, 0);
1592  splitterViews.SetViewOriginCoord(0, 1, 0);
1593  splitterViews.SetViewOriginCoord(0, 2, 0);
1594  splitterViews.SetViewOriginCoord(0, 3, 0);
1595 
1596  splitterViews.SetViewOriginCoord(1, 0, 0);
1597  splitterViews.SetViewOriginCoord(1, 1, 1);
1598  splitterViews.SetViewOriginCoord(1, 2, 0);
1599  splitterViews.SetViewOriginCoord(1, 3, 0);
1600 
1601  // create splitter layer
1602  Layer* const splitter = graph.AddLayer<SplitterLayer>(splitterViews, "splitter");
1603  CHECK(splitter);
1604 
1605  armnn::OriginsDescriptor concatViews(2);
1606  concatViews.SetViewOriginCoord(0, 0, 0);
1607  concatViews.SetViewOriginCoord(0, 1, 1);
1608  concatViews.SetViewOriginCoord(0, 2, 0);
1609  concatViews.SetViewOriginCoord(0, 3, 0);
1610 
1611  concatViews.SetViewOriginCoord(1, 0, 0);
1612  concatViews.SetViewOriginCoord(1, 1, 0);
1613  concatViews.SetViewOriginCoord(1, 2, 0);
1614  concatViews.SetViewOriginCoord(1, 3, 0);
1615 
1616  // create concat layer
1617  Layer* const concat = graph.AddLayer<ConcatLayer>(concatViews, "concat");
1618  CHECK(concat);
1619 
1620  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1621 
1622  // Adds connections.
1623  // connect input to splitter
1624  Connect(input, splitter, inputTensorInfo, 0, 0);
1625  // connect splitter[0] to concat[1]
1626  Connect(splitter, concat, splitTensorInfo1, 0, 1); // The splitter & concat are connected up.
1627  // connect splitter[1] to concat[0]
1628  Connect(splitter, concat, splitTensorInfo2, 1, 0); // So that the outputs are flipped round.
1629  // connect concat to output
1630  Connect(concat, output, inputTensorInfo, 0, 0);
1631 
1632  // created tensor handles
1633  CreateTensorHandles(graph, factory);
1634 
1635  // created splitter workload
1636  auto workloadSplitter = MakeAndCheckWorkload<SplitterWorkload>(*splitter, factory);
1637  CHECK(workloadSplitter);
1638  // created concat workload
1639  auto workloadConcat = MakeAndCheckWorkload<ConcatWorkload>(*concat, factory);
1640  CHECK(workloadConcat);
1641 
1642  return {std::move(workloadSplitter), std::move(workloadConcat)};
1643 }
1644 
1645 
1646 /// This function constructs a graph with a splitter with two outputs. Each of the outputs is then
1647 /// connected to two different activation layers
1648 template<typename SplitterWorkload, typename ActivationWorkload, armnn::DataType DataType>
1649 void CreateSplitterMultipleInputsOneOutputWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph,
1650  std::unique_ptr<SplitterWorkload>& wlSplitter,
1651  std::unique_ptr<ActivationWorkload>& wlActiv0_0,
1652  std::unique_ptr<ActivationWorkload>& wlActiv0_1,
1653  std::unique_ptr<ActivationWorkload>& wlActiv1_0,
1654  std::unique_ptr<ActivationWorkload>& wlActiv1_1)
1655 {
1656  armnn::TensorInfo inputTensorInfo ({ 1, 3, 100, 50 }, DataType);
1657  armnn::TensorInfo splitTensorInfo1({ 1, 1, 100, 50 }, DataType);
1658  armnn::TensorInfo splitTensorInfo2({ 1, 2, 100, 50 }, DataType);
1659 
1660  //Constructs the graph.
1661  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1662 
1663  armnn::ViewsDescriptor splitterViews(2);
1664 
1665  splitterViews.SetViewOriginCoord(0, 0, 0);
1666  splitterViews.SetViewOriginCoord(0, 1, 0);
1667  splitterViews.SetViewOriginCoord(0, 2, 0);
1668  splitterViews.SetViewOriginCoord(0, 3, 0);
1669 
1670  splitterViews.SetViewOriginCoord(1, 0, 0);
1671  splitterViews.SetViewOriginCoord(1, 1, 1);
1672  splitterViews.SetViewOriginCoord(1, 2, 0);
1673  splitterViews.SetViewOriginCoord(1, 3, 0);
1674 
1675  Layer* const splitter = graph.AddLayer<SplitterLayer>(splitterViews, "splitter");
1676 
1677  armnn::ActivationDescriptor activationDesc;
1678 
1679  Layer* const activ0_0 = graph.AddLayer<ActivationLayer>(activationDesc, "activ0_0");
1680  Layer* const activ0_1 = graph.AddLayer<ActivationLayer>(activationDesc, "activ0_1");
1681  Layer* const activ1_0 = graph.AddLayer<ActivationLayer>(activationDesc, "activ1_0");
1682  Layer* const activ1_1 = graph.AddLayer<ActivationLayer>(activationDesc, "activ1_1");
1683 
1684  Layer* const output1 = graph.AddLayer<OutputLayer>(1, "output1");
1685  Layer* const output2 = graph.AddLayer<OutputLayer>(2, "output2");
1686  Layer* const output3 = graph.AddLayer<OutputLayer>(3, "output3");
1687  Layer* const output4 = graph.AddLayer<OutputLayer>(4, "output4");
1688 
1689  // Adds connections.
1690  Connect(input, splitter, inputTensorInfo, 0, 0);
1691  Connect(splitter, activ0_0, splitTensorInfo1, 0, 0);
1692  Connect(splitter, activ0_1, splitTensorInfo1, 0, 0);
1693 
1694  Connect(splitter, activ1_0, splitTensorInfo2, 1, 0);
1695  Connect(splitter, activ1_1, splitTensorInfo2, 1, 0);
1696 
1697  Connect(activ0_0, output1, splitTensorInfo1, 0, 0);
1698  Connect(activ0_1, output2, splitTensorInfo1, 0, 0);
1699  Connect(activ1_0, output3, splitTensorInfo2, 0, 0);
1700  Connect(activ1_1, output4, splitTensorInfo2, 0, 0);
1701 
1702  CreateTensorHandles(graph, factory);
1703 
1704  auto workloadSplitter = MakeAndCheckWorkload<SplitterWorkload>(*splitter, factory);
1705  auto workloadActiv0_0 = MakeAndCheckWorkload<ActivationWorkload>(*activ0_0, factory);
1706  auto workloadActiv0_1 = MakeAndCheckWorkload<ActivationWorkload>(*activ0_1, factory);
1707  auto workloadActiv1_0 = MakeAndCheckWorkload<ActivationWorkload>(*activ1_0, factory);
1708  auto workloadActiv1_1 = MakeAndCheckWorkload<ActivationWorkload>(*activ1_1, factory);
1709 
1710  wlSplitter = std::move(workloadSplitter);
1711  wlActiv0_0 = std::move(workloadActiv0_0);
1712  wlActiv0_1 = std::move(workloadActiv0_1);
1713  wlActiv1_0 = std::move(workloadActiv1_0);
1714  wlActiv1_1 = std::move(workloadActiv1_1);
1715 }
1716 
1717 template <typename ResizeWorkload, armnn::DataType DataType>
1718 std::unique_ptr<ResizeWorkload> CreateResizeBilinearWorkloadTest(armnn::IWorkloadFactory& factory,
1719  armnn::Graph& graph,
1720  DataLayout dataLayout = DataLayout::NCHW)
1721 {
1722  TensorShape inputShape;
1723  TensorShape outputShape;
1724 
1725  switch (dataLayout) {
1726  case DataLayout::NHWC:
1727  inputShape = { 2, 4, 4, 3 };
1728  outputShape = { 2, 2, 2, 3 };
1729  break;
1730  case DataLayout::NCHW:
1731  default:
1732  inputShape = { 2, 3, 4, 4 };
1733  outputShape = { 2, 3, 2, 2 };
1734  }
1735 
1736  // Creates the layer we're testing.
1737  ResizeDescriptor resizeDesc;
1738  armnnUtils::DataLayoutIndexed dimensionIndices = dataLayout;
1739  resizeDesc.m_Method = ResizeMethod::Bilinear;
1740  resizeDesc.m_TargetWidth = outputShape[dimensionIndices.GetWidthIndex()];
1741  resizeDesc.m_TargetHeight = outputShape[dimensionIndices.GetHeightIndex()];
1742  resizeDesc.m_DataLayout = dataLayout;
1743  Layer* const layer = graph.AddLayer<ResizeLayer>(resizeDesc, "resize");
1744 
1745  // Creates extra layers.
1746  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1747  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1748 
1749  // Connects up.
1750  armnn::TensorInfo inputTensorInfo(inputShape, DataType);
1751  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
1752  Connect(input, layer, inputTensorInfo);
1753  Connect(layer, output, outputTensorInfo);
1754  CreateTensorHandles(graph, factory);
1755 
1756  // Makes the workload and checks it.
1757  auto workload = MakeAndCheckWorkload<ResizeWorkload>(*layer, factory);
1758 
1759  auto queueDescriptor = workload->GetData();
1760  CHECK(queueDescriptor.m_Inputs.size() == 1);
1761  CHECK(queueDescriptor.m_Outputs.size() == 1);
1762  CHECK(queueDescriptor.m_Parameters.m_DataLayout == dataLayout);
1763 
1764  // Returns so we can do extra, backend-specific tests.
1765  return workload;
1766 }
1767 
1768 template <typename BatchToSpaceNdWorkload, armnn::DataType DataType>
1769 std::unique_ptr<BatchToSpaceNdWorkload> CreateBatchToSpaceNdWorkloadTest(armnn::IWorkloadFactory& factory,
1770  armnn::Graph& graph)
1771 {
1773  Layer* const layer = graph.AddLayer<BatchToSpaceNdLayer>(desc, "batchToSpace");
1774 
1775  // Creates extra layers.
1776  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1777  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1778 
1779  // Connects up.
1780  armnn::TensorInfo tensorInfo({1, 1, 1, 1}, DataType);
1781 
1782  Connect(input, layer, tensorInfo);
1783  Connect(layer, output, tensorInfo);
1784 
1785  CreateTensorHandles(graph, factory);
1786 
1787  // Makes the workload and checks it.
1788  auto workload = MakeAndCheckWorkload<BatchToSpaceNdWorkload>(*layer, factory);
1789 
1790  BatchToSpaceNdQueueDescriptor queueDescriptor = workload->GetData();
1791  CHECK(queueDescriptor.m_Inputs.size() == 1);
1792  CHECK(queueDescriptor.m_Outputs.size() == 1);
1793 
1794  return workload;
1795 }
1796 
1797 template <typename LogSoftmaxWorkload, armnn::DataType DataType>
1798 std::unique_ptr<LogSoftmaxWorkload> CreateLogSoftmaxWorkloadTest(armnn::IWorkloadFactory& factory,
1799  armnn::Graph& graph)
1800 {
1801  // Create the layer we're testing.
1802  LogSoftmaxDescriptor logSoftmaxDescriptor;
1803  // Set Axis to -1 if CL or Neon until further Axes are supported.
1805  {
1806  logSoftmaxDescriptor.m_Axis = -1;
1807  }
1808 
1809  Layer* const layer = graph.AddLayer<LogSoftmaxLayer>(logSoftmaxDescriptor, "layer");
1810  // Create extra layers.
1811  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1812  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1813 
1814  // Connect up
1815  armnn::TensorInfo tensorInfo({4, 1}, DataType);
1816 
1817  Connect(input, layer, tensorInfo);
1818  Connect(layer, output, tensorInfo);
1819  CreateTensorHandles(graph, factory);
1820 
1821  // Make the workload and checks it.
1822  auto workload = MakeAndCheckWorkload<LogSoftmaxWorkload>(*layer, factory);
1823 
1824  LogSoftmaxQueueDescriptor queueDescriptor = workload->GetData();
1825  CHECK(queueDescriptor.m_Inputs.size() == 1);
1826  CHECK(queueDescriptor.m_Outputs.size() == 1);
1827 
1828  // Return so we can do extra, backend-specific tests.
1829  return workload;
1830 }
1831 
1832 template <typename L2NormalizationWorkload, armnn::DataType DataType>
1833 std::unique_ptr<L2NormalizationWorkload> CreateL2NormalizationWorkloadTest(armnn::IWorkloadFactory& factory,
1834  armnn::Graph& graph, DataLayout dataLayout = DataLayout::NCHW)
1835 {
1836  // Creates the layer we're testing.
1837  L2NormalizationDescriptor layerDesc;
1838  layerDesc.m_DataLayout = dataLayout;
1839 
1840  Layer* const layer = graph.AddLayer<L2NormalizationLayer>(layerDesc, "l2norm");
1841 
1842  // Creates extra layers.
1843  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1844  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1845 
1846  TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
1847  TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
1848  TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
1849  TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
1850 
1851  // Connects up.
1852  armnn::TensorInfo inputTensorInfo(inputShape, DataType);
1853  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
1854  Connect(input, layer, inputTensorInfo);
1855  Connect(layer, output, outputTensorInfo);
1856  CreateTensorHandles(graph, factory);
1857 
1858  // Makes the workload and checks it.
1859  auto workload = MakeAndCheckWorkload<L2NormalizationWorkload>(*layer, factory);
1860 
1861  L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
1862  CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
1863  CHECK(queueDescriptor.m_Inputs.size() == 1);
1864  CHECK(queueDescriptor.m_Outputs.size() == 1);
1865 
1866  // Returns so we can do extra, backend-specific tests.
1867  return workload;
1868 }
1869 
1870 template <typename ReshapeWorkload, armnn::DataType DataType>
1871 std::unique_ptr<ReshapeWorkload> CreateReshapeWorkloadTest(armnn::IWorkloadFactory& factory,
1872  armnn::Graph& graph)
1873 {
1874  // Creates the layer we're testing.
1875  TensorShape outputShape({ 1, 4 });
1876  ReshapeDescriptor reshapeDesc;
1877  reshapeDesc.m_TargetShape = outputShape;
1878  Layer* const layer = graph.AddLayer<ReshapeLayer>(reshapeDesc, "layer");
1879 
1880  // Creates extra layers.
1881  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1882  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1883 
1884  // Connects up.
1885  armnn::TensorInfo inputTensorInfo({ 4, 1 }, DataType);
1886  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
1887  Connect(input, layer, inputTensorInfo);
1888  Connect(layer, output, outputTensorInfo);
1889  CreateTensorHandles(graph, factory);
1890 
1891  // Makes the workload and checks it.
1892  auto workload = MakeAndCheckWorkload<ReshapeWorkload>(*layer, factory);
1893 
1894  ReshapeQueueDescriptor queueDescriptor = workload->GetData();
1895  CHECK(queueDescriptor.m_Inputs.size() == 1);
1896  CHECK(queueDescriptor.m_Outputs.size() == 1);
1897 
1898  // Returns so we can do extra, backend-specific tests.
1899  return workload;
1900 }
1901 
1902 template <typename ConvertFp16ToFp32Float32Workload>
1903 std::unique_ptr<ConvertFp16ToFp32Float32Workload> CreateConvertFp16ToFp32WorkloadTest(
1904  armnn::IWorkloadFactory& factory, armnn::Graph& graph)
1905 {
1906  // Creates the layer we're testing.
1907  ConvertFp16ToFp32Layer* const layer = graph.AddLayer<ConvertFp16ToFp32Layer>("Fp16ToFp32Converter");
1908 
1909  // Creates extra layers.
1910  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1911  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1912 
1913  // Connects up.
1914  armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16);
1915  armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32);
1916  Connect(input, layer, inputTensorInfo);
1917  Connect(layer, output, outputTensorInfo);
1918  CreateTensorHandles(graph, factory);
1919 
1920  // Makes the workload and checks it.
1921  auto workload = MakeAndCheckWorkload<ConvertFp16ToFp32Float32Workload>(*layer, factory);
1922 
1923  ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData();
1924  CHECK(queueDescriptor.m_Inputs.size() == 1);
1925  CHECK(queueDescriptor.m_Outputs.size() == 1);
1926 
1927  // Returns so we can do extra, backend-specific tests.
1928  return workload;
1929 }
1930 
1931 template <typename ConvertFp32ToFp16Float16Workload>
1932 std::unique_ptr<ConvertFp32ToFp16Float16Workload> CreateConvertFp32ToFp16WorkloadTest(
1933  armnn::IWorkloadFactory& factory, armnn::Graph& graph)
1934 {
1935  // Creates the layer we're testing.
1936  ConvertFp32ToFp16Layer* const layer = graph.AddLayer<ConvertFp32ToFp16Layer>("Fp32ToFp16Converter");
1937 
1938  // Creates extra layers.
1939  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1940  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1941 
1942  // Connects up.
1943  armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32);
1944  armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16);
1945  Connect(input, layer, inputTensorInfo);
1946  Connect(layer, output, outputTensorInfo);
1947  CreateTensorHandles(graph, factory);
1948 
1949  // Makes the workload and checks it.
1950  auto workload = MakeAndCheckWorkload<ConvertFp32ToFp16Float16Workload>(*layer, factory);
1951 
1952  ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData();
1953  CHECK(queueDescriptor.m_Inputs.size() == 1);
1954  CHECK(queueDescriptor.m_Outputs.size() == 1);
1955 
1956  // Returns so we can do extra, backend-specific tests.
1957  return workload;
1958 }
1959 
1960 template <typename MeanWorkload, armnn::DataType DataType>
1961 std::unique_ptr<MeanWorkload> CreateMeanWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph)
1962 {
1963  // Reduce along the first and second dimensions, and do not keep the reduced dimensions.
1964  MeanDescriptor descriptor({ 1, 2 }, false);
1965 
1966  // Creates the layer we're testing.
1967  Layer* const layer = graph.AddLayer<MeanLayer>(descriptor, "mean");
1968 
1969  // Creates extra layers.
1970  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1971  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1972 
1973  // Connects up.
1974  armnn::TensorInfo inputTensorInfo({ 1, 3, 7, 4 }, DataType);
1975  armnn::TensorInfo outputTensorInfo({ 1, 4 }, DataType);
1976  Connect(input, layer, inputTensorInfo);
1977  Connect(layer, output, outputTensorInfo);
1978  CreateTensorHandles(graph, factory);
1979 
1980  // Makes the workload and checks it.
1981  auto workload = MakeAndCheckWorkload<MeanWorkload>(*layer, factory);
1982 
1983  MeanQueueDescriptor queueDescriptor = workload->GetData();
1984  CHECK(queueDescriptor.m_Parameters.m_Axis == descriptor.m_Axis);
1985  CHECK(queueDescriptor.m_Parameters.m_KeepDims == descriptor.m_KeepDims);
1986  CHECK(queueDescriptor.m_Inputs.size() == 1);
1987  CHECK(queueDescriptor.m_Outputs.size() == 1);
1988 
1989  // Returns so we can do extra, backend-specific tests.
1990  return workload;
1991 }
1992 
1993 template<typename ConcatWorkload, armnn::DataType DataType>
1994 std::unique_ptr<ConcatWorkload> CreateConcatWorkloadTest(armnn::IWorkloadFactory &factory,
1995  armnn::Graph &graph,
1996  const armnn::TensorShape &outputShape,
1997  unsigned int concatAxis)
1998 {
1999  armnn::TensorInfo inputTensorInfo({ 2, 3, 2, 5 }, DataType);
2000  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
2001 
2002  // Constructs the graph.
2003  Layer* const input0 = graph.AddLayer<InputLayer>(0, "input0");
2004  Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
2005  armnn::OriginsDescriptor descriptor;
2006 
2007  std::vector<armnn::TensorShape> inputShapes{{ 2, 3, 2, 5 }, { 2, 3, 2, 5 }};
2008 
2009  descriptor = CreateDescriptorForConcatenation(inputShapes.begin(),
2010  inputShapes.end(),
2011  concatAxis);
2012 
2013  // create concat layer
2014  Layer* const concat = graph.AddLayer<ConcatLayer>(descriptor, "concat");
2015  CHECK(concat);
2016 
2017  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2018 
2019  // Adds connections.
2020  // connect input0 to concat
2021  Connect(input0, concat, inputTensorInfo, 0, 0);
2022  // connect input1 to concat
2023  Connect(input1, concat, inputTensorInfo, 0, 1);
2024  // connect concat to output
2025  Connect(concat, output, outputTensorInfo, 0, 0);
2026 
2027  // create tensor handles
2028  CreateTensorHandles(graph, factory);
2029 
2030  // create concat workload
2031  auto workloadConcat = MakeAndCheckWorkload<ConcatWorkload>(*concat, factory);
2032  CHECK(workloadConcat);
2033 
2034  return workloadConcat;
2035 }
2036 
2037 template <typename PreCompiledWorkload, armnn::DataType dataType>
2038 std::pair<armnn::IOptimizedNetworkPtr, std::unique_ptr<PreCompiledWorkload>> CreatePreCompiledWorkloadTest(
2039  armnn::IWorkloadFactory& factory,
2040  armnn::Graph& graph,
2041  bool biasEnabled = false)
2042 {
2043  IgnoreUnused(graph);
2044 
2045  // build up the structure of the network
2047 
2048  // Add an input layer
2049  armnn::IConnectableLayer* const inputLayer = net->AddInputLayer(0, "input layer");
2050  CHECK(inputLayer);
2051 
2052  // ArmNN weights tensor shape is OIHW (out channels, in channels, height, width) for NCHW
2053  // ArmNN weights tensor shape is OHWI (out channels, height, width, in channels) for NHWC
2054  // this test is using NHWC, so the weights shape is OHWI
2055  TensorInfo weightsTensorInfo(TensorShape({16, 1, 1, 16}), dataType, 0.9f, 0);
2056  unsigned int weightsLength = weightsTensorInfo.GetNumElements();
2057 
2058  using WeightType = armnn::ResolveType<dataType>;
2059  std::vector<WeightType> convWeightsData(weightsLength);
2060  for (unsigned int i = 0; i < weightsLength; ++i)
2061  {
2062  convWeightsData[i] = static_cast<WeightType>(i);
2063  }
2064 
2065  armnn::ConstTensor weights(weightsTensorInfo, convWeightsData);
2066 
2067  // Add a layer that can be used in the PreCompiled layer
2068  armnn::Convolution2dDescriptor convDesc2d;
2069  convDesc2d.m_StrideX = 1;
2070  convDesc2d.m_StrideY = 1;
2071  convDesc2d.m_BiasEnabled = biasEnabled;
2073 
2074  armnn::IConnectableLayer* convLayer = nullptr;
2075  const std::string convLayerName("conv layer");
2076 
2077  if (biasEnabled)
2078  {
2079  constexpr armnn::DataType biasDataType = ( dataType == armnn::DataType::QAsymmU8) ?
2080  armnn::DataType::Signed32 : armnn::DataType::Float32;
2081 
2082  TensorInfo biasTensorInfo(TensorShape({16}), biasDataType, 0.9f * 0.9f, 0);
2083  unsigned int biasLength = biasTensorInfo.GetNumElements();
2084 
2085  using BiasType = armnn::ResolveType<biasDataType>;
2086  std::vector<BiasType> biasData(biasLength);
2087  std::fill(biasData.begin(), biasData.end(), static_cast<BiasType>(0));
2088 
2089  armnn::ConstTensor biases(biasTensorInfo, biasData);
2090 
2091  // Create convolution layer with biases
2092  convLayer = net->AddConvolution2dLayer(convDesc2d,
2093  weights,
2094  Optional<ConstTensor>(biases),
2095  convLayerName.c_str());
2096  }
2097  else
2098  {
2099  // Create convolution layer without biases
2100  convLayer = net->AddConvolution2dLayer(convDesc2d,
2101  weights,
2102  EmptyOptional(),
2103  convLayerName.c_str());
2104  }
2105 
2106  CHECK(convLayer);
2107 
2108  // Add an output layer
2109  armnn::IConnectableLayer* const outputLayer = net->AddOutputLayer(0, "output layer");
2110  CHECK(outputLayer);
2111 
2112  // set the tensors in the network (NHWC format)
2113  TensorInfo inputTensorInfo(TensorShape({ 1, 16, 16, 16 }), dataType);
2114  if (dataType == armnn::DataType::QAsymmU8)
2115  {
2116  inputTensorInfo.SetQuantizationOffset(0);
2117  inputTensorInfo.SetQuantizationScale(0.9f);
2118  }
2119 
2120  TensorInfo outputTensorInfo(TensorShape({1, 16, 16, 16}), dataType);
2121  if (dataType == armnn::DataType::QAsymmU8)
2122  {
2123  outputTensorInfo.SetQuantizationOffset(0);
2124  outputTensorInfo.SetQuantizationScale(0.9f);
2125  }
2126 
2127  // Connect the layers
2128  inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
2129  inputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
2130 
2131  convLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
2132  convLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
2133 
2134  // Optimize the network for the backend supported by the factory
2135  std::vector<armnn::BackendId> backends = {factory.GetBackendId()};
2137  armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
2138  armnn::OptimizerOptions optimizerOptions;
2139  armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec(),
2140  optimizerOptions);
2141  CHECK(optimizedNet != nullptr);
2142 
2143  // Find the PreCompiled layer in the optimised graph
2144  armnn::Graph& optimisedGraph = GetGraphForTesting(optimizedNet.get());
2145  Layer* preCompiledLayer = nullptr;
2146  for (auto& layer : optimisedGraph)
2147  {
2148  if (layer->GetType() == LayerType::PreCompiled)
2149  {
2150  preCompiledLayer = layer;
2151  }
2152  }
2153  CHECK(preCompiledLayer != nullptr);
2154 
2155  // Create the TensorHandles.
2156  CreateTensorHandles(optimisedGraph, factory);
2157 
2158  // Make the workload and check it.
2159  auto workload = MakeAndCheckWorkload<PreCompiledWorkload>(*preCompiledLayer, factory);
2160 
2161  PreCompiledQueueDescriptor queueDescriptor = workload->GetData();
2162  CHECK(queueDescriptor.m_Inputs.size() == 1);
2163  CHECK(queueDescriptor.m_Outputs.size() == 1);
2164 
2165  // Returns the workload so we can do extra, backend-specific tests.
2166  // NOTE: We need to return the optimised network as well, otherwise it gets
2167  // out of scope and the tensor handles get destructed
2168  return std::make_pair(std::move(optimizedNet), std::move(workload));
2169 }
2170 
2171 template<typename ConstantWorkload, armnn::DataType DataType>
2172 std::unique_ptr<ConstantWorkload> CreateConstantWorkloadTest(armnn::IWorkloadFactory& factory,
2173  armnn::Graph& graph,
2174  const armnn::TensorShape& outputShape)
2175 {
2176  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
2177 
2178  // create constant layer
2179  auto constant = graph.AddLayer<ConstantLayer>("constant");
2180  CHECK(constant);
2181  constant->m_LayerOutput = std::make_unique<ScopedTensorHandle>(outputTensorInfo);
2182 
2183  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2184 
2185  // Adds connections.
2186  // connect constant to output
2187  Connect(constant, output, outputTensorInfo, 0, 0);
2188 
2189  // create tensor handles
2190  CreateTensorHandles(graph, factory);
2191 
2192  // create Constant workload"
2193  auto workloadConstant = MakeAndCheckWorkload<ConstantWorkload>(*constant, factory);
2194  CHECK(workloadConstant);
2195 
2196  return workloadConstant;
2197 }
2198 
2199 template <typename PreluWorkload>
2200 std::unique_ptr<PreluWorkload> CreatePreluWorkloadTest(armnn::IWorkloadFactory& factory,
2201  armnn::Graph& graph,
2202  const armnn::TensorShape& inputShape,
2203  const armnn::TensorShape& alphaShape,
2204  const armnn::TensorShape& outputShape,
2205  armnn::DataType dataType)
2206 {
2207  // Creates the PReLU layer
2208  Layer* const layer = graph.AddLayer<PreluLayer>("prelu");
2209  CHECK(layer != nullptr);
2210 
2211  // Creates extra layers
2212  Layer* const input = graph.AddLayer<InputLayer> (0, "input");
2213  Layer* const alpha = graph.AddLayer<InputLayer> (1, "alpha");
2214  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2215  CHECK(input != nullptr);
2216  CHECK(alpha != nullptr);
2217  CHECK(output != nullptr);
2218 
2219  // Connects up
2220  armnn::TensorInfo inputTensorInfo (inputShape, dataType);
2221  armnn::TensorInfo alphaTensorInfo (alphaShape, dataType);
2222  armnn::TensorInfo outputTensorInfo(outputShape, dataType);
2223  Connect(input, layer, inputTensorInfo, 0, 0);
2224  Connect(alpha, layer, alphaTensorInfo, 0, 1);
2225  Connect(layer, output, outputTensorInfo, 0, 0);
2226  CreateTensorHandles(graph, factory);
2227 
2228  // Makes the workload and checks it
2229  auto workload = MakeAndCheckWorkload<PreluWorkload>(*layer, factory);
2230 
2231  PreluQueueDescriptor queueDescriptor = workload->GetData();
2232  CHECK(queueDescriptor.m_Inputs.size() == 2);
2233  CHECK(queueDescriptor.m_Outputs.size() == 1);
2234 
2235  // Returns so we can do extra, backend-specific tests.
2236  return workload;
2237 }
2238 
2239 template <typename SpaceToDepthWorkload, armnn::DataType DataType>
2240 std::unique_ptr<SpaceToDepthWorkload> CreateSpaceToDepthWorkloadTest(armnn::IWorkloadFactory& factory,
2241  armnn::Graph& graph)
2242 {
2244  desc.m_BlockSize = 2;
2245  Layer* const layer = graph.AddLayer<SpaceToDepthLayer>(desc, "spaceToDepth");
2246 
2247  // Creates extra layers.
2248  Layer* const input = graph.AddLayer<InputLayer>(0, "input");
2249  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2250 
2251  // Connects up.
2252  armnn::TensorInfo inputTensorInfo({ 1, 2, 2, 1 }, DataType);
2253  armnn::TensorInfo outputTensorInfo({ 1, 1, 1, 4 }, DataType);
2254 
2255  Connect(input, layer, inputTensorInfo);
2256  Connect(layer, output, outputTensorInfo);
2257 
2258  CreateTensorHandles(graph, factory);
2259 
2260  // Makes the workload and checks it.
2261  auto workload = MakeAndCheckWorkload<SpaceToDepthWorkload>(*layer, factory);
2262 
2263  SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
2264  CHECK(queueDescriptor.m_Inputs.size() == 1);
2265  CHECK(queueDescriptor.m_Outputs.size() == 1);
2266 
2267  return workload;
2268 }
2269 
2270 template <typename StackWorkload, armnn::DataType DataType>
2271 std::unique_ptr<StackWorkload> CreateStackWorkloadTest(armnn::IWorkloadFactory& factory,
2272  armnn::Graph& graph,
2273  const armnn::TensorShape& inputShape,
2274  const armnn::TensorShape& outputShape,
2275  unsigned int axis,
2276  unsigned int numInputs)
2277 {
2278  armnn::TensorInfo inputTensorInfo(inputShape, DataType);
2279  armnn::TensorInfo outputTensorInfo(outputShape, DataType);
2280 
2281  // Constructs the Stack layer.
2282  armnn::StackDescriptor descriptor(axis, numInputs, inputShape);
2283  Layer* const stackLayer = graph.AddLayer<StackLayer>(descriptor, "stack");
2284  CHECK(stackLayer != nullptr);
2285 
2286  // Constructs layer inputs and output.
2287  std::vector<Layer*> inputs;
2288  for (unsigned int i=0; i<numInputs; ++i)
2289  {
2290  inputs.push_back(graph.AddLayer<InputLayer>(
2291  static_cast<int>(i),
2292  ("input" + std::to_string(i)).c_str()
2293  ));
2294  CHECK(inputs[i] != nullptr);
2295  }
2296  Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2297  CHECK(output != nullptr);
2298 
2299  // Adds connections.
2300  for (unsigned int i=0; i<numInputs; ++i)
2301  {
2302  Connect(inputs[i], stackLayer, inputTensorInfo, 0, i);
2303  }
2304  Connect(stackLayer, output, outputTensorInfo, 0, 0);
2305 
2306  CreateTensorHandles(graph, factory);
2307 
2308  auto stackWorkload = MakeAndCheckWorkload<StackWorkload>(*stackLayer, factory);
2309  StackQueueDescriptor queueDescriptor = stackWorkload->GetData();
2310  CHECK(queueDescriptor.m_Inputs.size() == numInputs);
2311  CHECK(queueDescriptor.m_Outputs.size() == 1);
2312 
2313  return stackWorkload;
2314 }
2315 
2316 } // Anonymous namespace
A layer that the constant data can be bound to.
std::shared_ptr< ConstTensorHandle > m_ForgetGateBias
A unique pointer to represent 1D weights tensor with dimensions [num_units].
std::shared_ptr< ConstTensorHandle > m_OutputGateBias
A unique pointer to represent 1D weights tensor with dimensions [num_units].
uint32_t m_PadBottom
Padding bottom value in the height dimension.
bool m_BiasEnabled
Enable/disable bias.
bool m_ProjectionEnabled
Enable/disable the projection layer.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
This layer represents a split operation.
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:39
virtual const BackendId & GetBackendId() const =0
LstmBasicParameters m_BasicParameters
Definition: LstmLayer.hpp:20
This layer represents a batch normalization operation.
A ViewsDescriptor for the SplitterLayer.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
uint32_t m_PadBottom
Padding bottom value in the height dimension.
bool m_BiasEnabled
Enable/disable bias.
std::shared_ptr< ConstTensorHandle > m_CellToForgetWeights
A unique pointer to represent 1D weights tensor with dimensions [num_units].
DataLayout
Definition: Types.hpp:53
unsigned int GetWidthIndex() const
float m_K
Kappa value used for the across channel normalization equation.
int m_Axis
Scalar, defaulted to the last index (-1), specifying the dimension the activation will be performed o...
uint32_t m_PadBottom
Padding bottom value in the height dimension.
uint32_t m_PadLeft
Padding left value in the width dimension.
float m_ClippingThresProj
Clipping threshold value for the projection.
A ReshapeDescriptor for the ReshapeLayer.
std::shared_ptr< ConstTensorHandle > m_OutputGateBias
A unique pointer to represent 1D bias tensor with dimensions [num_units] (int32). ...
Definition: QLstmLayer.hpp:35
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
This layer represents a depthwise convolution 2d operation.
std::shared_ptr< ConstTensorHandle > m_LayerOutput
LayerT * AddLayer(Args &&... args)
Adds a new layer, of type LayerType, to the graph constructed with the arguments passed.
Definition: Graph.hpp:402
bool m_TransposeWeightMatrix
Enable/disable transpose weight matrix.
std::vector< BackendOptions > ModelOptions
uint32_t m_PoolWidth
Pooling width value.
bool m_PeepholeEnabled
Enable/disable peephole.
A Convolution2dDescriptor for the Convolution2dLayer.
float m_Alpha
Alpha value for the normalization equation.
uint32_t m_PadLeft
Padding left value in the width dimension.
This layer converts data type Float 16 to Float 32.
float m_HiddenStateScale
Hidden State quantization scale.
float m_OutputIntermediateScale
Output intermediate quantization scale.
ResizeMethod m_Method
The Interpolation method to use (Bilinear, NearestNeighbor).
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:30
std::shared_ptr< ConstTensorHandle > m_ForgetLayerNormWeights
A unique pointer to represent 1D weights tensor with dimensions [num_units] (QSymmS16).
Definition: QLstmLayer.hpp:71
float m_Eps
Value to add to the variance. Used to avoid dividing by zero.
This layer represents a SpaceToDepth operation.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
This layer represents a reshape operation.
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:79
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
std::shared_ptr< ConstTensorHandle > m_InputToOutputWeights
A unique pointer to represent 2D weights tensor with dimensions [num_units, inputSize] (QSymmS8)...
Definition: QLstmLayer.hpp:21
This layer represents an activation operation with the specified activation function.
uint32_t m_PadTop
Padding top value in the height dimension.
std::shared_ptr< ConstTensorHandle > m_Mean
A unique pointer to store Mean values.
uint32_t m_PadRight
Padding right value in the width dimension.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
Copyright (c) 2021 ARM Limited and Contributors.
std::shared_ptr< ConstTensorHandle > m_InputToCellWeights
A unique pointer to represent 2D weights tensor with dimensions [input_size, num_units].
This layer represents a LSTM operation.
Definition: LstmLayer.hpp:16
void IgnoreUnused(Ts &&...)
void SetBackendId(const BackendId &id)
Definition: Layer.hpp:270
A SpaceToDepthDescriptor for the SpaceToDepthLayer.
A BatchToSpaceNdDescriptor for the BatchToSpaceNdLayer.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
std::shared_ptr< ConstTensorHandle > m_InputToCellWeights
A unique pointer to represent 2D weights tensor with dimensions [num_units, inputSize] (QSymmS8)...
Definition: QLstmLayer.hpp:19
std::shared_ptr< ConstTensorHandle > m_Beta
A unique pointer to store Beta values.
unsigned int GetHeightIndex() const
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
QLstmOptLayerNormParameters m_LayerNormParameters
Definition: QLstmLayer.hpp:87
NormalizationAlgorithmMethod m_NormMethodType
Normalization method algorithm to use (LocalBrightness, LocalContrast).
This layer represents a elementwiseUnary operation.
A ResizeDescriptor for the ResizeLayer.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
A StackDescriptor for the StackLayer.
std::shared_ptr< ConstTensorHandle > m_CellBias
A unique pointer to represent 1D bias tensor with dimensions [num_units] (int32). ...
Definition: QLstmLayer.hpp:33
TensorShape m_TargetShape
Target shape value.
std::shared_ptr< ConstTensorHandle > m_CellToOutputWeights
A unique pointer to represent 1D weights tensor with dimensions [num_units].
uint32_t m_PoolHeight
Pooling height value.
uint32_t m_PadTop
Padding top value in the height dimension.
std::shared_ptr< ConstTensorHandle > m_InputToForgetWeights
A unique pointer to represent 2D weights tensor with dimensions [num_units, inputSize] (QSymmS8)...
Definition: QLstmLayer.hpp:17
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
std::shared_ptr< ConstTensorHandle > m_RecurrentToCellWeights
A unique pointer to represent 2D weights tensor with dimensions [output_size, num_units].
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
A layer user-provided data can be bound to (e.g. inputs, outputs).
Definition: OutputLayer.hpp:13
std::shared_ptr< ConstTensorHandle > m_CellBias
A unique pointer to represent 1D weights tensor with dimensions [num_units].
bool m_LayerNormEnabled
Enable/disable layer normalization.
DataType
Definition: Types.hpp:35
This layer represents a fully connected operation.
An LstmDescriptor for the LstmLayer.
uint32_t m_PadRight
Padding right value in the width dimension.
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
uint32_t m_PadTop
Padding top value in the height dimension.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1613
std::shared_ptr< ConstTensorHandle > m_CellLayerNormWeights
A unique pointer to represent 1D weights tensor with dimensions [num_units] (QSymmS16).
Definition: QLstmLayer.hpp:73
This layer represents a QuantizedLstm operation.
This layer represents a log softmax operation.
std::shared_ptr< ConstTensorHandle > m_ForgetGateBias
A unique pointer to represent 1D bias tensor with dimensions [num_units] (int32). ...
Definition: QLstmLayer.hpp:31
A L2NormalizationDescriptor for the L2NormalizationLayer.
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout...
An OriginsDescriptor for the ConcatLayer.
float m_ProjectionClip
Clipping threshold value for the projection.
A FullyConnectedDescriptor for the FullyConnectedLayer.
bool m_BiasEnabled
Enable/disable bias.
This layer represents a stack operation.
Definition: StackLayer.hpp:13
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
float m_InputIntermediateScale
Input intermediate quantization scale.
This layer represents a merge operation.
Definition: ConcatLayer.hpp:13
This layer represents a softmax operation.
uint32_t m_TargetWidth
Target width value.
bool m_PeepholeEnabled
Enable/disable peephole.
This layer represents a BatchToSpaceNd operation.
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:173
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
A QLstmDescriptor for the QLstmLayer.
std::shared_ptr< ConstTensorHandle > m_RecurrentToOutputWeights
A unique pointer to represent 2D weights tensor with dimensions [output_size, num_units].
std::shared_ptr< ConstTensorHandle > m_Gamma
A unique pointer to store Gamma values.
GPU Execution: OpenCL: ArmCompute.
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:25
min(a, max(b, input)) ReLu1 & ReLu6.
std::shared_ptr< ConstTensorHandle > m_OutputLayerNormWeights
A unique pointer to represent 1D weights tensor with dimensions [num_units] (QSymmS16).
Definition: QLstmLayer.hpp:75
std::shared_ptr< ConstTensorHandle > m_Variance
A unique pointer to store Variance values.
uint32_t m_TargetHeight
Target height value.
uint32_t m_ActivationFunc
The activation function to use.
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
This layer represents a normalization operation.
This layer represents a pooling 2d operation.
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
std::shared_ptr< ConstTensorHandle > m_RecurrentToCellWeights
A unique pointer to represent 2D weights tensor with dimensions [num_units, outputSize] (QSymmS8)...
Definition: QLstmLayer.hpp:26
float m_ClippingThresCell
Clipping threshold value for the cell state.
This layer converts data type Float 32 to Float 16.
unsigned int m_BlockSize
Scalar specifying the input block size. It must be >= 1.
DataType GetBiasDataType(DataType inputDataType)
void SetAdditionalInfoForObject(const AdditionalInfoObjectPtr &additionalInfo)
Definition: Layer.hpp:347
float m_ForgetIntermediateScale
Forget intermediate quantization scale.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
This layer represents an addition operation.
QLstmBasicParameters m_BasicParameters
Definition: QLstmLayer.hpp:83
LstmOptPeepholeParameters m_PeepholeParameters
Definition: LstmLayer.hpp:23
std::shared_ptr< ConstTensorHandle > m_RecurrentToOutputWeights
A unique pointer to represent 2D weights tensor with dimensions [num_units, outputSize] (QSymmS8)...
Definition: QLstmLayer.hpp:28
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:25
NormalizationAlgorithmChannel m_NormChannelType
Normalization channel algorithm to use (Across, Within).
This layer represents a QLstm operation.
Definition: QLstmLayer.hpp:79
float m_CellClip
Clipping threshold value for the cell state.
float m_A
Alpha upper bound value used by the activation functions. (BoundedReLu, Linear, TanH, Elu).
Definition: Descriptors.hpp:50
This layer represents a subtraction operation.
bool m_CifgEnabled
Enable/disable cifg (coupled input & forget gate).
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
A ElementwiseUnaryDescriptor for the ElementwiseUnaryLayer.
Definition: Descriptors.hpp:98
std::shared_ptr< ConstTensorHandle > m_InputToForgetWeights
A unique pointer to represent 2D weights tensor with dimensions [input_size, num_units].
PoolingAlgorithm m_PoolType
The pooling algorithm to use (Max. Average, L2).
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
std::shared_ptr< ConstTensorHandle > m_RecurrentToForgetWeights
A unique pointer to represent 2D weights tensor with dimensions [output_size, num_units].
This layer represents a L2 normalization operation.
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
CPU Execution: NEON: ArmCompute.
bool m_ProjectionEnabled
Enable/disable the projection layer.
OutputShapeRounding m_OutputShapeRounding
The rounding method for the output shape. (Floor, Ceiling).
A layer user-provided data can be bound to (e.g. inputs, outputs).
Definition: InputLayer.hpp:13
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
A MeanDescriptor for the MeanLayer.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:516
UnaryOperation
Definition: Types.hpp:104
DataType GetDataType() const
Definition: Layer.cpp:284
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
This layer represents a convolution 2d operation.
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:491
void Connect(armnn::IConnectableLayer *from, armnn::IConnectableLayer *to, const armnn::TensorInfo &tensorInfo, unsigned int fromIndex, unsigned int toIndex)
Definition: TestUtils.cpp:12
OriginsDescriptor CreateDescriptorForConcatenation(TensorShapeIt first, TensorShapeIt last, unsigned int concatenationDimension)
Convenience template to create an OriginsDescriptor to use when creating a ConcatLayer for performing...
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:177
This layer represents a mean operation.
Definition: MeanLayer.hpp:14
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:172
virtual int Connect(IInputSlot &destination)=0
Krichevsky 2012: Local Brightness Normalization.
A Pooling2dDescriptor for the Pooling2dLayer.
A NormalizationDescriptor for the NormalizationLayer.
std::shared_ptr< ConstTensorHandle > m_RecurrentToForgetWeights
A unique pointer to represent 2D weights tensor with dimensions [num_units, outputSize] (QSymmS8)...
Definition: QLstmLayer.hpp:24
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
This layer represents a multiplication operation.
virtual void CreateTensorHandles(const TensorHandleFactoryRegistry &registry, const IWorkloadFactory &factory, const bool IsMemoryManaged=true)
Definition: Layer.cpp:250
virtual std::unique_ptr< IWorkload > CreateWorkload(const IWorkloadFactory &factory) const =0
float m_CellIntermediateScale
Cell intermediate quantization scale.
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:530
float m_B
Beta lower bound value used by the activation functions. (BoundedReLu, Linear, TanH).
Definition: Descriptors.hpp:52
A SoftmaxDescriptor for the SoftmaxLayer.
float m_Beta
Beta value for the normalization equation.
bool m_CifgEnabled
Enable/disable CIFG (coupled input & forget gate).
uint32_t m_NormSize
Depth radius value.
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
Definition: Descriptors.hpp:48
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
Depthwise Convolution 2D layer workload data.
A BatchNormalizationDescriptor for the BatchNormalizationLayer.
uint32_t m_PadLeft
Padding left value in the width dimension.
std::shared_ptr< T > GetAdditionalInformation() const
Definition: Layer.hpp:342
This layer represents a resize operation.
Definition: ResizeLayer.hpp:13
std::shared_ptr< ConstTensorHandle > m_InputToOutputWeights
A unique pointer to represent 2D weights tensor with dimensions [input_size, num_units].
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:405
uint32_t m_PadRight
Padding right value in the width dimension.
int32_t m_HiddenStateZeroPoint
Hidden State zero point.
bool m_ConstantWeights
Enable/disable constant weights and biases.