ArmNN
 20.05
Conv2dTestImpl.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "Conv2dTestImpl.hpp"
7 
8 #include <QuantizeHelper.hpp>
10 
13 #include <armnnUtils/Permute.hpp>
14 
16 
20 
21 #include <test/TensorHelpers.hpp>
22 
23 #include <boost/numeric/conversion/cast.hpp>
24 
25 #include <string>
26 
27 //
28 // Static data
29 //
30 
31 // 2-channel bias used by a number of Conv2d tests.
32 static std::vector<float> Bias2({0, 2});
33 
34 static std::vector<float> Bias4({1, 2, 3, 4});
35 
36 static std::vector<float> Bias8({1, 2, 3, 4, 1, 2, 3, 4});
37 
38 // 3-channel 16x8 image used as common input data for a number of Conv2d tests.
39 static std::vector<float> ConvInput3x8x16({
40  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
41  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
42  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
43  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
44  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
45  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
46  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
47  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
48  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
56  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
57  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
58  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
59  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
60  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
61  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
62  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
63  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
64 });
65 
66 using namespace armnnUtils;
67 
68 //
69 // Helper templates
70 //
71 
72 // Helper template that returns either Bias2 or an empty vector depending on whether bias is enabled.
73 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
74 boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale)
75 {
76  if(biasEnabled)
77  {
78  armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, ArmnnType);
79  boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias2, qScale, 0.0f));
80  return bias;
81  }
82  else
83  {
84  return boost::multi_array<T, 1>();
85  }
86 }
87 
88 // Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
89 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
90 boost::multi_array<T, 1> GetBias4(bool biasEnabled, float qScale)
91 {
92  if(biasEnabled)
93  {
94  armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
95  boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias4, qScale, 0.0f));
96  return bias;
97  }
98  else
99  {
100  return boost::multi_array<T, 1>();
101  }
102 }
103 
104 // Helper template that returns either Bias8 or an empty vector depending on whether bias is enabled.
105 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
106 boost::multi_array<T, 1> GetBias8(bool biasEnabled, float qScale)
107 {
108  if(biasEnabled)
109  {
110  armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
111  boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias8, qScale, 0.0f));
112  return bias;
113  }
114  else
115  {
116  return boost::multi_array<T, 1>();
117  }
118 }
119 
120 // Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
121 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
122 boost::multi_array<T, 1> GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout)
123 {
124  const armnnUtils::DataLayoutIndexed dataLayoutIndexed(layout);
125  const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
126  const unsigned int outputChannels = outputInfo.GetShape()[channelsIndex];
127 
128  switch (outputChannels)
129  {
130  case 2:
131  default:
132  {
133  return GetBias2<ArmnnType>(biasEnabled, qScale);
134  }
135  case 4:
136  {
137  return GetBias4<ArmnnType>(biasEnabled, qScale);
138  }
139  case 8:
140  {
141  return GetBias8<ArmnnType>(biasEnabled, qScale);
142  }
143  }
144 }
145 
146 //
147 // Implementation templates
148 //
149 
150 // Mapping from input type to bias type for fully connected layers.
151 // float => float, uint8_t => int32_t
152 template<typename T>
153 struct FullyConnectedBiasTypeForInputType;
154 
155 template<>
156 struct FullyConnectedBiasTypeForInputType<float>
157 {
158  using Type = float;
159 };
160 
161 template<>
162 struct FullyConnectedBiasTypeForInputType<uint8_t>
163 {
164  using Type = int32_t;
165 };
166 
167 // Modifies a std::vector in-place using a specified bias.
168 template<typename T, typename B>
169 void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset,
170  const std::vector<B>& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h)
171 {
172  ARMNN_ASSERT_MSG((armnn::IsQuantizedType<T>() && vScale != 0.0f) || (!armnn::IsQuantizedType<T>()),
173  "Invalid type and parameter combination.");
174  ARMNN_ASSERT_MSG((armnn::IsQuantizedType<B>() && bScale != 0.0f) || (!armnn::IsQuantizedType<B>()),
175  "Invalid type and parameter combination.");
176 
177  // Note we need to dequantize and re-quantize the image value and the bias.
178  for (uint32_t i = 0; i < bias.size(); ++i)
179  {
180  float dBias = SelectiveDequantize(bias[i], bScale, bOffset);
181  for (uint32_t y = 0; y < h; ++y)
182  {
183  for (uint32_t x = 0; x < w; ++x)
184  {
185  uint32_t offset = (i * h + y) * w + x;
186  ARMNN_ASSERT(offset < v.size());
187  T& outRef = v[offset];
188  float dOutput = SelectiveDequantize(outRef, vScale, vOffset);
189  outRef = SelectiveQuantize<T>(dOutput + dBias, vScale, vOffset);
190  }
191  }
192  }
193 }
194 
195 //
196 // Convolution2d implementations
197 //
198 
199 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
202  armnn::IWorkloadFactory& workloadFactory,
204  const boost::multi_array<T, 4>& originalInput,
205  const boost::multi_array<T, 4>& originalKernel,
206  const boost::multi_array<B, 1>& bias,
207  const boost::multi_array<T, 4>& originalOutputExpected,
208  float qScale,
209  int32_t qOffset,
211  uint32_t padLeft = 0,
212  uint32_t padTop = 0,
213  uint32_t padRight = 0,
214  uint32_t padBottom = 0,
215  uint32_t strideX = 1,
216  uint32_t strideY = 1,
217  uint32_t dilationX = 1,
218  uint32_t dilationY = 1)
219 {
220  armnn::IgnoreUnused(memoryManager);
221  unsigned int inputHeight = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
222  unsigned int inputWidth = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
223  unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
224  unsigned int inputNum = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
225 
226  unsigned int outputHeight = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
227  unsigned int outputWidth = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
228  unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
229  unsigned int outputNum = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
230 
231  unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
232  unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
233  unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
234  unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
235 
236  bool biasEnabled = bias.size() > 0;
237 
238  // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
239  ARMNN_ASSERT(inputNum == 1);
240  ARMNN_ASSERT(outputNum == 1);
241 
242  // If a bias is used, its size must equal the number of output channels.
243  ARMNN_ASSERT(!biasEnabled || bias.size() == outputChannels);
244 
245 
246  // Note these tensors will use two (identical) batches.
247  armnn::TensorInfo inputTensorInfo =
248  armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
249  armnn::TensorInfo outputTensorInfo =
250  armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
251  armnn::TensorInfo kernelDesc =
252  armnnUtils::GetTensorInfo(kernelDepthMul, kernelChannels, kernelHeight, kernelWidth, layout, ArmnnType);
253  armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
254 
255  // Set quantization parameters if the requested type is a quantized type.
256  if(armnn::IsQuantizedType<T>())
257  {
258  inputTensorInfo.SetQuantizationScale(qScale);
259  inputTensorInfo.SetQuantizationOffset(qOffset);
260  outputTensorInfo.SetQuantizationScale(qScale);
261  outputTensorInfo.SetQuantizationOffset(qOffset);
262  kernelDesc.SetQuantizationScale(qScale);
263  kernelDesc.SetQuantizationOffset(qOffset);
264  biasDesc.SetQuantizationScale(qScale*qScale);
265  biasDesc.SetQuantizationOffset(0);
266  }
267 
268  LayerTestResult<T, 4> ret(outputTensorInfo);
269 
270  // Construct input data - two batches of the same input image.
271  std::vector<T> inputImage;
272  inputImage.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth);
273  std::vector<T> inputData;
274  inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
275  inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
276 
277  // at this point if we require it permute the input data
278  const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
279  if (layout == armnn::DataLayout::NHWC)
280  {
281  std::vector<T> tmp(inputData.size());
282  armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
283  inputData = tmp;
284  }
285 
286  auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
287 
288  std::vector<T> outputImage;
289  outputImage.assign(originalOutputExpected.data(),
290  originalOutputExpected.data() + outputChannels*outputHeight*outputWidth);
291 
292  // Apply bias to output image if it is enabled.
293  if(biasEnabled)
294  {
295  std::vector<T> biasV;
296  biasV.assign(bias.data(), bias.data() + outputChannels);
297  ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
298  biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
299  outputWidth, outputHeight);
300  }
301 
302  // Construct expected output data - two identical images.
303  std::vector<T> outputData;
304  outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
305  outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
306 
307  // at this point if we require it permute the expected output
308  if (layout == armnn::DataLayout::NHWC)
309  {
310  std::vector<T> tmp(outputData.size());
311  armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
312  outputData = tmp;
313  }
314  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
315 
316  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
317  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
318 
321  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
322  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
323  // Permute the kernel if necessary
324  boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
325  if (layout == armnn::DataLayout::NHWC)
326  {
327  armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data(), sizeof(T));
328  }
329  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
330 
331  if(biasEnabled)
332  {
333  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
334  }
335 
336  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
337  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
338 
339  data.m_Weight = &weightsTensor;
340  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
341  data.m_Parameters.m_StrideX = strideX;
342  data.m_Parameters.m_StrideY = strideY;
343  data.m_Parameters.m_PadLeft = padLeft;
344  data.m_Parameters.m_PadRight = padRight;
345  data.m_Parameters.m_PadTop = padTop;
346  data.m_Parameters.m_PadBottom = padBottom;
347  data.m_Parameters.m_BiasEnabled = biasEnabled;
348  data.m_Parameters.m_DataLayout = layout;
349  data.m_Parameters.m_DilationX = dilationX;
350  data.m_Parameters.m_DilationY = dilationY;
351 
352  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
353  inputHandle->Allocate();
354  outputHandle->Allocate();
355 
356  CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
357 
358  ExecuteWorkload(*workload, memoryManager);
359 
360  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
361 
362  return ret;
363 }
364 
365 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
367  armnn::DataType OutType = ArmnnType, typename O = armnn::ResolveType<OutType>>
369  armnn::IWorkloadFactory& workloadFactory,
371  const boost::multi_array<T, 4>& input,
372  const boost::multi_array<T, 4>& kernel,
373  const boost::multi_array<B, 1>& bias,
374  const boost::multi_array<O, 4>& outputExpected,
375  const armnn::DataLayout dataLayout,
376  float qScale,
377  int32_t qOffset,
378  uint32_t padLeft = 1,
379  uint32_t padTop = 1,
380  uint32_t padRight = 1,
381  uint32_t padBottom = 1,
382  uint32_t strideX = 1,
383  uint32_t strideY = 1)
384 {
385  armnn::IgnoreUnused(qScale, qOffset);
386  unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]);
387  unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[3]);
388  unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[1]);
389  unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[2]);
390 
391  unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
392  unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
393  unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
394  unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
395 
396  unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
397  unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
398  unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
399  unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
400 
401  bool biasEnabled = bias.size() > 0;
402 
403  // Creates the tensors.
404  armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, ArmnnType);
405  armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels},
406  OutType);
407  armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, ArmnnType);
408  armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
409 
410  // Construct the input data.
411  std::vector<T> inputData;
412  inputData.assign(input.data(), input.data() + inputHeight*inputWidth*inputChannels);
413  auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
414 
415  // Construct the output data, with bias applied, as appropriate.
416  std::vector<O> outputData;
417  outputData.assign(outputExpected.data(), outputExpected.data() + outputHeight*outputWidth*outputChannels);
418 
419  LayerTestResult<O, 4> ret(outputTensorInfo);
420  ret.outputExpected = MakeTensor<O, 4>(outputTensorInfo, outputData);
421 
422  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
423  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
424 
425  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
426  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
427 
428  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
429 
431 
432  data.m_Weight = &weightsTensor;
433  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
434  data.m_Parameters.m_StrideX = strideX;
435  data.m_Parameters.m_StrideY = strideY;
436  data.m_Parameters.m_PadLeft = padLeft;
437  data.m_Parameters.m_PadRight = padRight;
438  data.m_Parameters.m_PadTop = padTop;
439  data.m_Parameters.m_PadBottom = padBottom;
440  data.m_Parameters.m_BiasEnabled = biasEnabled;
441  data.m_Parameters.m_DataLayout = dataLayout;
442 
444  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
445  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
446 
447  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
448  inputHandle->Allocate();
449  outputHandle->Allocate();
450 
451  CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
452 
453  ExecuteWorkload(*workload, memoryManager);
454 
455  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
456 
457  return ret;
458 }
459 
460 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
462  armnn::IWorkloadFactory& workloadFactory,
464  float qScale,
465  int32_t qOffset,
466  bool biasEnabled)
467 {
469  // Until we have a specialist 1D convolution layer, we can fake one using
470  // 2D convolution with the final dimension set to 1.
471  // I don't anticipate this being particularly slow, given that convolution is implemented
472  // as a matrix multiplication, at which point dimension doesn't matter.
473 
474  unsigned int batchSize = 1;
475  unsigned int inputChannels = 2;
476  unsigned int outputChannels = 3;
477  unsigned int inputSize = 5; // The 1D size (could view as 'width' or 'height').
478  unsigned int kernelSize = 3;
479  unsigned int padSize = 2;
480  unsigned int stride = 1;
481  unsigned int outputSize = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride.
482 
483  armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, ArmnnType);
484  armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, ArmnnType);
485  armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, ArmnnType);
486  armnn::TensorInfo biasInfo({outputChannels}, ArmnnBType);
487 
488  // Set quantization parameters if the requested type is a quantized type.
489  if(armnn::IsQuantizedType<T>())
490  {
491  inputInfo.SetQuantizationScale(qScale);
492  inputInfo.SetQuantizationOffset(qOffset);
493  outputInfo.SetQuantizationScale(qScale);
494  outputInfo.SetQuantizationOffset(qOffset);
495  kernelInfo.SetQuantizationScale(qScale);
496  kernelInfo.SetQuantizationOffset(qOffset);
497  biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale());
498  biasInfo.SetQuantizationOffset(0);
499  }
500 
501  std::vector<T> inputData = QuantizedVector<T>(
502  {
503  5.0f, -2.0f, 2.5f, 0.0f, 1.0f,
504  -3.0f, 3.2f, 5.0f, 2.0f, 3.0f,
505  },
506  inputInfo.GetQuantizationScale(),
507  inputInfo.GetQuantizationOffset());
508 
509  std::vector<T> kernelData = QuantizedVector<T>(
510  {
511  1.0f, 0.0f, 0.0f,
512  0.0f, 2.0f, -1.5f,
513 
514  0.0f, 0.0f, 0.0f,
515  0.2f, 0.2f, 0.2f,
516 
517  0.5f, 0.0f, 0.5f,
518  0.0f, -1.0f, 0.0f
519  },
520  kernelInfo.GetQuantizationScale(),
521  kernelInfo.GetQuantizationOffset());
522 
523  std::vector<B> biasData =
524  QuantizedVector<B>({ 1.0f, 0.0f, 0.0f }, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset());
525 
526  std::vector<T> outputData = QuantizedVector<T>(
527  {
528  4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f,
529  -0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f,
530  2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f
531  },
532  outputInfo.GetQuantizationScale(),
533  outputInfo.GetQuantizationOffset());
534 
535  // Optionally apply bias to output image.
536  if(biasEnabled)
537  {
538  ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(),
539  biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(),
540  1, outputSize);
541  }
542 
543  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
544  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
545 
548  armnn::ScopedCpuTensorHandle weightsTensor(kernelInfo);
549  armnn::ScopedCpuTensorHandle biasTensor(biasInfo);
550 
551  AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data());
552  AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
553 
554  AddInputToWorkload(data, info, inputInfo, inputHandle.get());
555  AddOutputToWorkload(data, info, outputInfo, outputHandle.get());
556 
557  data.m_Weight = &weightsTensor;
558  data.m_Bias = &biasTensor;
559  data.m_Parameters.m_StrideX = 1;
560  data.m_Parameters.m_StrideY = stride;
561  data.m_Parameters.m_PadLeft = 0;
562  data.m_Parameters.m_PadRight = 0;
563  data.m_Parameters.m_PadTop = padSize;
564  data.m_Parameters.m_PadBottom = padSize;
565  data.m_Parameters.m_BiasEnabled = biasEnabled;
566 
567  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
568  inputHandle->Allocate();
569  outputHandle->Allocate();
570 
571  CopyDataToITensorHandle(inputHandle.get(), inputData.data());
572 
573  ExecuteWorkload(*workload, memoryManager);
574 
575  // Output
576  LayerTestResult<T,4> ret(outputInfo);
577  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
578  ret.outputExpected = MakeTensor<T, 4>(outputInfo, outputData);
579  return ret;
580 }
581 
582 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
584  armnn::IWorkloadFactory& workloadFactory,
586  float qScale,
587  int32_t qOffset,
588  bool biasEnabled,
589  armnn::DataLayout dataLayout)
590 {
591  armnn::IgnoreUnused(biasEnabled);
592  // Use common single-batch 5x5 image.
593 
594  armnn::TensorInfo inputDesc({1, 3, 4, 1}, ArmnnType);
595  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
596  {
597  1, 5, 2, 3,
598  8, 7, 3, 6,
599  3, 3, 9, 1
600  });
601 
602 
603  // Use a 2-element batch of 3-channel 3x3 kernels.
604  armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
605  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, {
606  4, 5, 6,
607  0, 0, 0,
608  3, 2, 1
609  });
610 
611  // Expected output is 1 batch of a 5x5 image.
612  armnn::TensorInfo outputDesc({1, 3, 4, 1}, ArmnnType);
613 
614  const std::vector<float> outputData =
615  {
616  23, 41, 33, 21,
617  44, 65, 76, 52,
618  82, 85, 79, 42
619  };
620 
621  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
622 
623  return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
624  workloadFactory,
625  memoryManager,
626  input,
627  kernel,
628  boost::multi_array<T, 1>(),
629  expectedOutput,
630  dataLayout,
631  qScale,
632  qOffset);
633 }
634 
635 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
637  armnn::IWorkloadFactory& workloadFactory,
639  float qScale,
640  int32_t qOffset,
641  bool biasEnabled,
642  const armnn::DataLayout& dataLayout)
643 {
644  armnn::IgnoreUnused(biasEnabled);
645 
646  // Input is a single-batch, 1 channel, 5x5 image.
647  armnn::TensorInfo inputDesc({1, 5, 5, 1}, ArmnnType);
648  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
649  {
650  1, 5, 2, 3, 5,
651  8, 7, 3, 6, 3,
652  3, 3, 9, 1, 9,
653  4, 1, 8, 1, 3,
654  6, 8, 1, 9, 2
655  });
656 
657  // Use a 3x3 kernel.
658  armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
659  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc,
660  {
661  4, 5, 6,
662  0, 0, 0,
663  3, 2, 1
664  });
665 
666  // Expected output is a single-batch, 1 channel, 3x3 image.
667  armnn::TensorInfo outputDesc({1, 3, 3, 1}, ArmnnType);
668 
669  const std::vector<T> outputData =
670  {
671  23, 33, 24,
672  91, 99, 48,
673  26, 50, 19
674  };
675 
676  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
677 
678  uint32_t padLeft = 1;
679  uint32_t padTop = 1;
680  uint32_t padRight = 1;
681  uint32_t padBottom = 1;
682  uint32_t strideX = 2;
683  uint32_t strideY = 2;
684 
685  return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
686  workloadFactory,
687  memoryManager,
688  input,
689  kernel,
690  boost::multi_array<T, 1>(),
691  expectedOutput,
692  dataLayout,
693  qScale,
694  qOffset,
695  padLeft,
696  padTop,
697  padRight,
698  padBottom,
699  strideX,
700  strideY);
701 }
702 
703 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
705  armnn::IWorkloadFactory& workloadFactory,
707  float qScale,
708  int32_t qOffset,
709  bool biasEnabled,
710  const armnn::DataLayout layout)
711 {
712  // Use common single-batch 3-channel 16x8 image.
713  armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
714  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset));
715 
716  // Use a 2-element batch with 3-channel 3x5 kernels.
717  armnn::TensorInfo kernelDesc({2, 3, 5, 3}, ArmnnType);
718  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
719  QuantizedVector<T>({
720  1, 1, 1,
721  1, -1, 1,
722  1, 1, 1,
723  1, 1, 1,
724  1, 1, 1,
725 
726  0, 0, 0,
727  0, 0, 0,
728  0, 0, 0,
729  0, 0, 0,
730  0, 0, 0,
731 
732  2, 2, 2,
733  2, 2, 2,
734  2, 2, 2,
735  2, 2, 2,
736  2, 2, 2,
737 
738 
739  0, 0, 0,
740  0, 0, 0,
741  0, 0, 0,
742  0, 0, 0,
743  0, 0, 0,
744 
745  1, 1, 1,
746  1, 1, 1,
747  1, 1, 1,
748  1, 1, 1,
749  1, 1, 1,
750 
751  0, 0, 0,
752  0, 0, 0,
753  0, 0, 0,
754  0, 0, 0,
755  0, 0, 0
756  },
757  qScale, qOffset)));
758 
759  // Expected output is 2 batch elements of a 1-channel 14x4 image.
760  armnn::TensorInfo outputDesc({1, 2, 4, 14}, ArmnnType);
761  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
762  QuantizedVector<T>({
763  -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
764  -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
765  -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
766  -23.5f, -23.5f, -23.5f,
767  -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
768  -23.5f, -23.5f, -23.5f,
769 
770  5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
771  5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
772  5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
773  5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
774  },
775  qScale, qOffset)));
776 
777  return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
778  workloadFactory,
779  memoryManager,
780  input,
781  kernel,
782  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
783  expectedOutput,
784  qScale,
785  qOffset,
786  layout);
787 }
788 
789 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
790  typename T = armnn::ResolveType<ArmnnType>>
792  armnn::IWorkloadFactory& workloadFactory,
794  float qScale,
795  int32_t qOffset,
796  bool biasEnabled,
797  const armnn::DataLayout layout)
798 {
799  // Use a 3x3 kernel, which exercises ArmCompute's direct convolution path.
800 
801  // Use common single-batch 3-channel 16x8 image.
802  armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
803  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset));
804 
805  // Use a 2-element batch of 3-channel 3x3 kernels.
806  armnn::TensorInfo kernelDesc({2, 3, 3, 3}, ArmnnType);
807  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
808  QuantizedVector<T>({
809  1, 1, 1,
810  1, -1, 1,
811  1, 1, 1,
812 
813  0, 0, 0,
814  0, 0, 0,
815  0, 0, 0,
816 
817  2, 2, 2,
818  2, 2, 2,
819  2, 2, 2,
820 
821 
822  0, 0, 0,
823  0, 0, 0,
824  0, 0, 0,
825 
826  1, 1, 1,
827  1, 1, 1,
828  1, 1, 1,
829 
830  0, 0, 0,
831  0, 0, 0,
832  0, 0, 0
833  },
834  qScale, qOffset)));
835 
836  // Expected output is 1 batch of a 2-channel 14x6 image.
837  armnn::TensorInfo outputDesc({1, 2, 6, 14}, ArmnnType);
838  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
839  QuantizedVector<T>({
840  -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
841  -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
842  -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
843  -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
844  -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
845  -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
846 
847  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
848  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
849  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
850  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
851  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
852  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
853  },
854  qScale, qOffset)));
855 
856  return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
857  workloadFactory,
858  memoryManager,
859  input,
860  kernel,
861  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
862  expectedOutput,
863  qScale,
864  qOffset,
865  layout);
866 }
867 
868 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
869  typename T = armnn::ResolveType<ArmnnType>>
871  armnn::IWorkloadFactory& workloadFactory,
873  const armnn::DataLayout layout,
874  float qScale,
875  int32_t qOffset)
876 {
877  // Use a single-batch 1-channel 3x3 image as input.
878  armnn::TensorInfo inputDesc({1, 1, 3, 3}, ArmnnType);
879  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
880  QuantizedVector<T>({
881  11,21,31,
882  12,22,32,
883  13,23,33
884  },
885  qScale, qOffset)));
886 
887  // Use 1 batch of a 1-channel 2x2 kernel.
888  armnn::TensorInfo kernelDesc({1, 1, 2, 2}, ArmnnType);
889  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
890  QuantizedVector<T>({
891  -11,-21,
892  -12,-22,
893  },
894  qScale, qOffset)));
895 
896 // Expected output is 1 batch of a 1-channel 6x8 image.
897 // Manually calculated like this:
898 //[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..]
899 //[-11*0 -21*0 -12*0 -22*11 ; -11*0 -21*0 -12*11 -22*21 ; -11*0 -21*0 -12*21 -22*31 ; -11*0 -21*0 -12*31 -22*0 ..]
900 //[-11*0 -21*11 -12*0 -22*12 ; -11*11 -21*21 -12*12 -22*22 ; -11*21 -21*31 -12*22 -22*32 ; -11*31 -21*0 -12*32 -22*0 ..]
901 //[-11*0 -21*12 -12*0 -22*13 ; -11*12 -21*22 -12*13 -22*23 ; -11*22 -21*32 -12*23 -22*33 ; -11*32 -21*0 -12*33 -22*0 ..]
902 //[-11*0 -21*13 -12*0 -22*0 ; -11*13 -21*23 -12*0 -22*0 ; -11*23 -21*33 -12*0 -22*0 ; -11*33 -21*0 -12*0 -22*0 ..]
903 //[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..]
904 //[..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ..]
905  armnn::TensorInfo outputDesc({1, 1, 8, 6}, ArmnnType);
906  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
907  QuantizedVector<T>({
908  0, 0, 0, 0, 0, 0,
909  -242, -594, -934, -372, 0, 0,
910  -495, -1190, -1850, -725, 0, 0,
911  -538, -1256, -1916, -748, 0, 0,
912  -273, -626, -946, -363, 0, 0,
913  0, 0, 0, 0, 0, 0,
914  0, 0, 0, 0, 0, 0,
915  0, 0, 0, 0, 0, 0
916  },
917  qScale, qOffset)));
918 
919  return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
920  workloadFactory,
921  memoryManager,
922  input,
923  kernel,
924  GetBias2<ArmnnBType>(false, qScale * qScale),
925  expectedOutput,
926  qScale,
927  qOffset,
928  layout,
929  1, // Padding left.
930  2, // Padding top.
931  3, // Padding right.
932  4); // Padding bottom.
933 }
934 
935 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
936  typename T = armnn::ResolveType<ArmnnType>>
938  armnn::IWorkloadFactory& workloadFactory,
940  const armnn::DataLayout layout,
941  float qScale,
942  int32_t qOffset)
943 {
944  // Use a single-batch 1-channel 5x5 image as input.
945  armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, ArmnnType);
946  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
947  QuantizedVector<T>({
948  11,21,31,41,51,
949  12,22,32,42,52,
950  13,23,33,43,53,
951  14,24,34,44,54,
952  15,25,35,45,55,
953  }, qScale, qOffset)));
954 
955  // Use 1 batch of a 1-channel 4x4 kernel.
956  armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, ArmnnType);
957  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
958  QuantizedVector<T>({
959  -11,-21,-31,-41,
960  -12,-22,-32,-42,
961  -13,-23,-33,-43,
962  -14,-24,-34,-44,
963  },
964  qScale, qOffset)));
965 
966  // Expected output is 1 batch of a 1-channel 5x5 image.
967  armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, ArmnnType);
968  std::vector<T> myVec(outputDesc.GetNumElements(), 0);
969  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
970  QuantizedVector<T>({
971  -7140, -10580, -13940, -9300, -5230,
972  -9590, -14120, -18520, -12290, -6860,
973  -9980, -14560, -18960, -12560, -7000,
974  -7518, -10904, -14144, -9318, -5152,
975  -5032, -7256, -9376, -6142, -3368,
976  },
977  qScale, qOffset)));
978 
979  return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
980  workloadFactory,
981  memoryManager,
982  input,
983  kernel,
984  GetBias2<ArmnnBType>(false, qScale * qScale),
985  expectedOutput,
986  qScale,
987  qOffset,
988  layout,
989  1, // Padding left.
990  1, // Padding top.
991  2, // Padding right.
992  2); // Padding bottom.
993 }
994 
995 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
997  armnn::IWorkloadFactory& workloadFactory,
999  const std::vector<float>& inputNoQuantizedValues,
1000  armnn::TensorInfo& inputTensorInfo,
1001  const std::vector<float>& kernelNoQuantizedValues,
1002  armnn::TensorInfo& kernelTensorInfo,
1003  const std::vector<float>& outputExpectedNoQuantizedValues,
1004  armnn::TensorInfo& outputTensorInfo,
1005  uint32_t dilationX,
1006  uint32_t dilationY,
1008  uint32_t padLeft = 0,
1009  uint32_t padTop = 0,
1010  uint32_t padRight = 0,
1011  uint32_t padBottom = 0,
1012  uint32_t strideX = 1,
1013  uint32_t strideY = 1,
1014  bool biasEnabled = false
1015 )
1016 {
1017  float qScale;
1018  int32_t qOffset;
1019  switch (ArmnnType)
1020  {
1023  {
1024  qScale = 0.1f;
1025  qOffset = 128;
1026  break;
1027  }
1029  {
1030  qScale = 0.1f;
1031  qOffset = 0;
1032  break;
1033  }
1035  default:
1036  {
1037  qScale = 0.f;
1038  qOffset = 0;
1039  break;
1040  }
1041  }
1042 
1043  inputTensorInfo.SetQuantizationScale(qScale);
1044  inputTensorInfo.SetQuantizationOffset(qOffset);
1045  kernelTensorInfo.SetQuantizationScale(qScale);
1046  kernelTensorInfo.SetQuantizationOffset(qOffset);
1047  outputTensorInfo.SetQuantizationScale(qScale);
1048  outputTensorInfo.SetQuantizationOffset(qOffset);
1049 
1050  auto input = MakeTensor<T, 4>(inputTensorInfo,
1051  std::vector<T>(QuantizedVector<T>(inputNoQuantizedValues,
1052  inputTensorInfo.GetQuantizationScale(),
1053  inputTensorInfo.GetQuantizationOffset())));
1054  auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
1055  std::vector<T>(QuantizedVector<T>(kernelNoQuantizedValues,
1056  kernelTensorInfo.GetQuantizationScale(),
1057  kernelTensorInfo.GetQuantizationOffset())));
1058  auto expectedOutput =
1059  MakeTensor<T, 4>(outputTensorInfo,
1060  std::vector<T>(QuantizedVector<T>(outputExpectedNoQuantizedValues,
1061  outputTensorInfo.GetQuantizationScale(),
1062  outputTensorInfo.GetQuantizationOffset())));
1063 
1064  return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
1065  workloadFactory,
1066  memoryManager,
1067  input,
1068  kernel,
1069  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
1070  expectedOutput,
1071  qScale,
1072  qOffset,
1073  layout,
1074  padLeft,
1075  padTop,
1076  padRight,
1077  padBottom,
1078  strideX,
1079  strideY,
1080  dilationX,
1081  dilationY);
1082 }
1083 
1084 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
1086  armnn::IWorkloadFactory& workloadFactory,
1088  bool biasEnabled,
1089  const armnn::DataLayout layout)
1090 {
1091  armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
1092  std::vector<float> inputNoQuantizedValues =
1093  {
1094  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1095  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1096  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1097  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1098  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1099  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1100  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1101  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1102  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1103  0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1104  };
1105 
1106  armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
1107  std::vector<float> kernelNoQuantizedValues =
1108  {
1109  1, 2, 3,
1110  4, 5, 6,
1111  7, 8, 9
1112  };
1113 
1114  // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
1115  // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
1116  armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
1117  std::vector<float> outputExpectedNoQuantizedValues =
1118  {
1119  6., 5., 5., 5.,
1120  6., 5., 5., 5.,
1121  6., 5., 5., 5.,
1122  3., 2., 2., 2.
1123  };
1124 
1125  return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1126  workloadFactory,
1127  memoryManager,
1128  inputNoQuantizedValues,
1129  inputTensorInfo,
1130  kernelNoQuantizedValues,
1131  kernelTensorInfo,
1132  outputExpectedNoQuantizedValues,
1133  outputTensorInfo,
1134  3,
1135  3,
1136  layout,
1137  biasEnabled);
1138 }
1139 
1140 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
1142  armnn::IWorkloadFactory& workloadFactory,
1144  bool biasEnabled,
1145  const armnn::DataLayout layout)
1146 {
1147  armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
1148  std::vector<float> inputNoQuantizedValues =
1149  {
1150  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1151  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1152  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1153  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1154  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1155  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1156  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1157  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1158  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1159  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1160 
1161  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1162  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1163  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1164  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1165  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1166  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1167  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1168  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1169  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1170  0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1171  };
1172 
1173  armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
1174  std::vector<float> kernelNoQuantizedValues =
1175  {
1176  1, 2, 3,
1177  4, 5, 6,
1178  7, 8, 9,
1179 
1180  1, 2, 3,
1181  4, 5, 6,
1182  7, 8, 9
1183  };
1184 
1185  // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
1186  // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
1187  armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
1188  std::vector<float> outputExpectedNoQuantizedValues =
1189  {
1190  12., 10., 10., 10.,
1191  12., 10., 10., 10.,
1192  12., 10., 10., 10.,
1193  6., 4., 4., 4.
1194  };
1195 
1196  return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1197  workloadFactory,
1198  memoryManager,
1199  inputNoQuantizedValues,
1200  inputTensorInfo,
1201  kernelNoQuantizedValues,
1202  kernelTensorInfo,
1203  outputExpectedNoQuantizedValues,
1204  outputTensorInfo,
1205  3,
1206  3,
1207  layout,
1208  biasEnabled);
1209 }
1210 
1211 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
1213  armnn::IWorkloadFactory &workloadFactory,
1215  bool biasEnabled,
1216  const armnn::DataLayout layout)
1217 {
1218  armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
1219  std::vector<float> inputNoQuantizedValues =
1220  {
1221  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1222  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1223  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1224  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1225  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1226  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1227  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1228  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1229  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1230  1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1231  };
1232 
1233  armnn::TensorInfo kernelTensorInfo({ 1, 1, 2, 2}, ArmnnType);
1234  std::vector<float> kernelNoQuantizedValues =
1235  {
1236  1, 2,
1237  3, 4
1238  };
1239 
1240  // Since the dilation rate is 2 this will dilate the kernel to be like 3x3: d(K-1)+1 --> 2 x (2-1) + 1 = 3,
1241  // therefore the output will be 4x4: (I − K + 2P)/S +1 => trunc ( (10 - 3 + 2x2 ) / 3 + 1 )
1242  // where, dilation size = d = 2; kernel size = K = 2; input size = I = 10; padding size = P = 2; stride = S = 3
1243  armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
1244  std::vector<float> outputExpectedNoQuantizedValues =
1245  {
1246  4, 7, 7, 3,
1247  6, 10, 10, 4,
1248  6, 10, 10, 4,
1249  2, 3, 3, 1
1250  };
1251  uint32_t padLeft = 1;
1252  uint32_t padTop = 1;
1253  uint32_t padRight = 1;
1254  uint32_t padBottom = 1;
1255 
1256  return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1257  workloadFactory,
1258  memoryManager,
1259  inputNoQuantizedValues,
1260  inputTensorInfo,
1261  kernelNoQuantizedValues,
1262  kernelTensorInfo,
1263  outputExpectedNoQuantizedValues,
1264  outputTensorInfo,
1265  2,
1266  2,
1267  layout,
1268  padLeft,
1269  padTop,
1270  padRight,
1271  padBottom,
1272  3,
1273  3,
1274  biasEnabled
1275  );
1276 }
1277 
1278 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
1280  armnn::IWorkloadFactory& workloadFactory,
1282  armnn::IWorkloadFactory& refWorkloadFactory)
1283 {
1284  unsigned int inputHeight = 8;
1285  unsigned int inputWidth = 16;
1286  unsigned int inputChannels = 3;
1287  unsigned int inputNum = 5;
1288 
1289  unsigned int kernelHeight = 3;
1290  unsigned int kernelWidth = 3;
1291 
1292  unsigned int strideX = 2;
1293  unsigned int strideY = 3;
1294  unsigned int padX = 1;
1295  unsigned int padY = 1;
1296 
1297  unsigned int outputNum = inputNum;
1298  unsigned int outputChannels = 2;
1299  unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
1300  unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
1301 
1302  armnn::TensorInfo inputTensorInfo;
1303  armnn::TensorInfo outputTensorInfo;
1304  armnn::TensorInfo kernelDesc;
1305  armnn::TensorInfo biasDesc;
1306 
1307  unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth};
1308  unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth};
1309  unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth};
1310  unsigned int biasShape[] = {outputChannels};
1311 
1312  inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
1313  outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
1314  kernelDesc = armnn::TensorInfo(4, kernelShape, ArmnnType);
1315  biasDesc = armnn::TensorInfo(1, biasShape, ArmnnType);
1316 
1317  LayerTestResult<T,4> ret(outputTensorInfo);
1318 
1319  auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908);
1320  auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234);
1321  auto bias = MakeRandomTensor<T, 1>(biasDesc, 1028);
1322 
1323  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
1324  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1325 
1328  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
1329  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
1330 
1331  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
1332  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
1333 
1334  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1335  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1336  data.m_Weight = &weightsTensor;
1337  data.m_Bias = &biasTensor;
1338  data.m_Parameters.m_StrideX = strideX;
1339  data.m_Parameters.m_StrideY = strideY;
1340  data.m_Parameters.m_PadLeft = padX;
1341  data.m_Parameters.m_PadRight = padX;
1342  data.m_Parameters.m_PadTop = padY;
1343  data.m_Parameters.m_PadBottom = padY;
1344  data.m_Parameters.m_BiasEnabled = true;
1345 
1346  std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
1347  std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
1348 
1349  armnn::Convolution2dQueueDescriptor refData = data;
1350  armnn::WorkloadInfo refInfo = info;
1351  SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
1352  SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
1353 
1354  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
1355  std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateConvolution2d(refData, refInfo);
1356 
1357  outputHandleRef->Allocate();
1358  inputHandleRef->Allocate();
1359 
1360  inputHandle->Allocate();
1361  outputHandle->Allocate();
1362 
1363  CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
1364  CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
1365 
1366  ExecuteWorkload(*workload, memoryManager);
1367 
1368  workloadRef->PostAllocationConfigure();
1369  workloadRef->Execute();
1370 
1371  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1372  CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
1373 
1374  return ret;
1375 }
1376 
1378  armnn::IWorkloadFactory& workloadFactory,
1380  bool biasEnabled,
1381  const armnn::DataLayout& dataLayout)
1382 {
1383  // BFloat16 input and weight, Float32 output
1384  armnn::IgnoreUnused(biasEnabled);
1385 
1386  // Input is a single-batch, 1 channel, 5x5 image.
1387  armnn::TensorInfo inputDesc({1, 5, 5, 1}, armnn::DataType::BFloat16);
1388 
1389  std::vector<armnn::BFloat16> inputValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
1390  {
1391  10.0367984f, // 10.0625
1392  2.0380895f, // 2.03125
1393  15.0420157f, // 15.0625
1394  22.0675631f, // 22.125
1395  8.0938920f, // 8.125
1396  5.0476106f, // 5.0625
1397  80.1035490f, // 80
1398  100.1260370f, // 100
1399  55.0461647f, // 55
1400  120.0883828f, // 120
1401  9.1159540f, // 9.125
1402  90.0498519f, // 90
1403  200.0104630f, // 200
1404  30.0154114f, // 30
1405  75.00137681f, // 75
1406  30.0344238f, // 30
1407  25.0356445f, // 25
1408  130.0495605f, // 130
1409  60.0683594f, // 60
1410  35.0991211f, // 35
1411  8.0461426f, // 8.0625
1412  12.0996094f, // 12.125
1413  98.1269530f, // 98
1414  125.0393066f, // 125
1415  5.103516f // 5.0937
1416  },
1417  1.0f, 0);
1418 
1419  auto input = MakeTensor<armnn::BFloat16, 4>(inputDesc, inputValues);
1420 
1421  // Use a 3x3 kernel.
1422  armnn::TensorInfo kernelDesc({1, 3, 3, 1}, armnn::DataType::BFloat16);
1423 
1424  std::vector<armnn::BFloat16> kernelValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
1425  {
1426  -0.126184f, // -0.125977
1427  -0.150468f, // -0.150391
1428  -0.101412f, // -0.101562
1429  -0.0586369f,// -0.0585938
1430  -0.0865864f,// -0.0864258
1431  -0.0435089f,// -0.043457
1432  0.0347555f, // 0.034668
1433  0.0323111f, // 0.0322266
1434  0.0385381f // 0.0385742
1435  },
1436  1.0f, 0);
1437 
1438  auto kernel = MakeTensor<armnn::BFloat16, 4>(kernelDesc, kernelValues);
1439 
1440  // Expected output is a single-batch, 1 channel, 3x3 image.
1441  armnn::TensorInfo outputDesc({1, 3, 3, 1}, armnn::DataType::Float32);
1442 
1443  // Expected output (with results if calculated as FP32 in the comments)
1444  const std::vector<float> outputData =
1445  {
1446  2.296875f, // 2.29240716
1447  5.75f, // 5.75851926
1448  3.78125f, // 3.79855026
1449  -11.625f, // -11.65498118
1450  -47.25f, // -47.27316893
1451  -30.0f, // -30.04771684
1452  -8.25f, // -8.28126168
1453  -43.5f, // -43.46531337
1454  -20.625f // -20.63477281
1455  };
1456 
1457  boost::multi_array<float, 4> expectedOutput = MakeTensor<float, 4>(outputDesc, outputData);
1458 
1459  uint32_t padLeft = 1;
1460  uint32_t padTop = 1;
1461  uint32_t padRight = 1;
1462  uint32_t padBottom = 1;
1463  uint32_t strideX = 2;
1464  uint32_t strideY = 2;
1465 
1468  workloadFactory,
1469  memoryManager,
1470  input,
1471  kernel,
1472  boost::multi_array<float, 1>(),
1473  expectedOutput,
1474  dataLayout,
1475  1.0f,
1476  0,
1477  padLeft,
1478  padTop,
1479  padRight,
1480  padBottom,
1481  strideX,
1482  strideY);
1483 }
1484 
1486  armnn::IWorkloadFactory& workloadFactory,
1488  bool biasEnabled,
1489  const armnn::DataLayout& dataLayout)
1490 {
1491  // BFloat16 input and weight, Float32 output
1492  armnn::IgnoreUnused(biasEnabled);
1493 
1494  // Input is a single-batch, 1 channel, 5x5 image.
1495  armnn::TensorInfo inputDesc({1, 5, 5, 1}, armnn::DataType::BFloat16);
1496 
1497  std::vector<armnn::BFloat16> inputValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
1498  {
1499  0.0367984f, // 0.0368652
1500  0.0380895f, // 0.0380859
1501  0.0420157f, // 0.0419922
1502  0.0675631f, // 0.0673828
1503  0.0938920f, // 0.09375
1504  0.0476106f, // 0.0476074
1505  0.1035490f, // 0.103516
1506  0.1260370f, // 0.125977
1507  0.0461647f, // 0.0461426
1508  0.0883828f, // 0.0883789
1509  0.1159540f, // 0.115723
1510  0.0498519f, // 0.0498047
1511  0.0104630f, // 0.010437
1512  0.0154114f, // 0.0154419
1513  0.00137681f, // 0.00137329
1514  0.0344238f, // 0.0344616
1515  0.0356445f, // 0.0355693
1516  0.0495605f, // 0.0495018
1517  0.0683594f, // 0.0683308
1518  0.0991211f, // 0.0988837
1519  0.0461426f, // 0.0461838
1520  0.0996094f, // 0.0997546
1521  0.1269530f, // 0.127099
1522  0.0393066f, // 0.0392791
1523  0.103516f // 0.103641
1524  },
1525  1.0f, 0);
1526 
1527  auto input = MakeTensor<armnn::BFloat16, 4>(inputDesc, inputValues);
1528 
1529  // Use a 3x3 kernel.
1530  armnn::TensorInfo kernelDesc({1, 3, 3, 1}, armnn::DataType::BFloat16);
1531 
1532  std::vector<armnn::BFloat16> kernelValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
1533  {
1534  -0.126184f, // -0.125977
1535  -0.150468f, // -0.150391
1536  -0.101412f, // -0.101562
1537  -0.0586369f,// -0.0585938
1538  -0.0865864f,// -0.0864258
1539  -0.0435089f,// -0.043457
1540  0.0347555f, // 0.034668
1541  0.0323111f, // 0.0322266
1542  0.0385381f // 0.0385742
1543  },
1544  1.0f, 0);
1545 
1546  auto kernel = MakeTensor<armnn::BFloat16, 4>(kernelDesc, kernelValues);
1547 
1548  // Expected output is a single-batch, 1 channel, 3x3 image.
1549  armnn::TensorInfo outputDesc({1, 3, 3, 1}, armnn::DataType::Float32);
1550 
1551  // Expected output (with results if calculated as FP32 in the comments)
1552  const std::vector<float> outputData =
1553  {
1554  0.000686645508f, // 0.000685
1555  0.000640869141f, // 0.000639
1556  -0.00759887695f, // -0.007631
1557  -0.02734375f, // -0.027388
1558  -0.0356445312f, // -0.035737
1559  -0.0145874023f, // -0.014568
1560  -0.0170898438f, // -0.017124
1561  -0.0373535156f, // -0.037431
1562  -0.0346679688f // -0.034808
1563  };
1564 
1565  boost::multi_array<float, 4> expectedOutput = MakeTensor<float, 4>(outputDesc, outputData);
1566 
1567  uint32_t padLeft = 1;
1568  uint32_t padTop = 1;
1569  uint32_t padRight = 1;
1570  uint32_t padBottom = 1;
1571  uint32_t strideX = 2;
1572  uint32_t strideY = 2;
1573 
1576  workloadFactory,
1577  memoryManager,
1578  input,
1579  kernel,
1580  boost::multi_array<float, 1>(),
1581  expectedOutput,
1582  dataLayout,
1583  1.0f,
1584  0,
1585  padLeft,
1586  padTop,
1587  padRight,
1588  padBottom,
1589  strideX,
1590  strideY);
1591 }
1592 
1593 //
1594 // DepthwiseConvolution2d implementations
1595 //
1596 
1597 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
1600  armnn::IWorkloadFactory& workloadFactory,
1602  const boost::multi_array<T, 4>& input,
1603  const boost::multi_array<T, 4>& kernel,
1604  const boost::multi_array<B, 1>& bias,
1605  const boost::multi_array<T, 4>& outputExpected,
1606  float qScale,
1607  int32_t qOffset,
1608  const armnn::DataLayout layout,
1609  uint32_t padLeft = 0,
1610  uint32_t padTop = 0,
1611  uint32_t padRight = 0,
1612  uint32_t padBottom = 0,
1613  uint32_t strideX = 1,
1614  uint32_t strideY = 1)
1615 {
1616  unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]);
1617  unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]);
1618  unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]);
1619  unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]);
1620  unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
1621  unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
1622  unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
1623  unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
1624  unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
1625  unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
1626  unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
1627  unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
1628 
1629  // If a bias is used, its size must equal the number of output channels.
1630  bool biasEnabled = bias.size() > 0;
1631  ARMNN_ASSERT(!biasEnabled || bias.size() == outputChannels);
1632 
1633  // Creates the tensors.
1634  armnn::TensorInfo inputTensorInfo =
1635  armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
1636  armnn::TensorInfo outputTensorInfo =
1637  armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
1638  armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
1639  armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
1640 
1641  // Set quantization parameters if the requested type is a quantized type.
1642  if (armnn::IsQuantizedType<T>())
1643  {
1644  inputTensorInfo.SetQuantizationScale(qScale);
1645  inputTensorInfo.SetQuantizationOffset(qOffset);
1646  outputTensorInfo.SetQuantizationScale(qScale);
1647  outputTensorInfo.SetQuantizationOffset(qOffset);
1648  kernelDesc.SetQuantizationScale(qScale);
1649  kernelDesc.SetQuantizationOffset(qOffset);
1650  biasDesc.SetQuantizationScale(qScale*qScale);
1651  biasDesc.SetQuantizationOffset(0);
1652  }
1653 
1654  // Construct the input data.
1655  std::vector<T> inputData;
1656  inputData.assign(input.data(), input.data() + inputChannels*inputHeight*inputWidth);
1657 
1658  // At this point if we require it permute the input data
1659  const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
1660  if (layout == armnn::DataLayout::NHWC)
1661  {
1662  std::vector<T> tmp(inputData.size());
1663  armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
1664  inputData = tmp;
1665  }
1666 
1667  auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
1668 
1669  // Construct the output data, with bias applied, as appropriate.
1670  std::vector<T> outputData;
1671  outputData.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth);
1672  if (biasEnabled)
1673  {
1674  std::vector<T> biasV;
1675  biasV.assign(bias.data(), bias.data() + outputChannels);
1676  ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
1677  biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
1678  outputWidth, outputHeight);
1679  }
1680 
1681  LayerTestResult<T, 4> ret(outputTensorInfo);
1682 
1683  // At this point if we require it permute the expected output
1684  if (layout == armnn::DataLayout::NHWC)
1685  {
1686  std::vector<T> tmp(outputData.size());
1687  armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
1688  outputData = tmp;
1689  }
1690 
1691  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
1692 
1693  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
1694  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1695 
1696  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
1697 
1698  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
1699 
1700  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
1701  if (biasEnabled)
1702  {
1703  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
1704  }
1705 
1707  data.m_Weight = &weightsTensor;
1708  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs.
1709  data.m_Parameters.m_StrideX = strideX;
1710  data.m_Parameters.m_StrideY = strideY;
1711  data.m_Parameters.m_PadLeft = padLeft;
1712  data.m_Parameters.m_PadRight = padRight;
1713  data.m_Parameters.m_PadTop = padTop;
1714  data.m_Parameters.m_PadBottom = padBottom;
1715  data.m_Parameters.m_BiasEnabled = biasEnabled;
1716  data.m_Parameters.m_DataLayout = layout;
1717 
1719  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1720  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1721 
1722  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
1723  inputHandle->Allocate();
1724  outputHandle->Allocate();
1725 
1726  CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
1727 
1728  ExecuteWorkload(*workload, memoryManager);
1729 
1730  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1731 
1732  return ret;
1733 }
1734 
1735 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
1737  armnn::IWorkloadFactory& workloadFactory,
1739  float qScale,
1740  int32_t qOffset,
1741  bool biasEnabled,
1742  const armnn::DataLayout layout)
1743 {
1745 
1746  unsigned int inputHeight = 3;
1747  unsigned int inputWidth = 3;
1748  unsigned int inputChannels = 2;
1749  unsigned int inputNum = 1;
1750 
1751  unsigned int kernelHeight = 3;
1752  unsigned int kernelWidth = 3;
1753  unsigned int kernelChannels = inputChannels;
1754  unsigned int kernelDepthMultiplier = 1;
1755 
1756  unsigned int outputHeight = 1;
1757  unsigned int outputWidth = 1;
1758  unsigned int outputChannels = kernelChannels;
1759  unsigned int outputNum = inputNum;
1760 
1761  armnn::TensorInfo inputTensorInfo =
1762  armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
1763  armnn::TensorInfo outputTensorInfo =
1764  armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
1765  armnn::TensorInfo kernelDesc({kernelDepthMultiplier, kernelChannels, kernelHeight, kernelWidth},
1766  ArmnnType);
1767  armnn::TensorInfo biasDesc({ outputChannels }, ArmnnBType);
1768 
1769  // Set quantization parameters if the requested type is a quantized type.
1770  if(armnn::IsQuantizedType<T>())
1771  {
1772  inputTensorInfo.SetQuantizationScale(qScale);
1773  inputTensorInfo.SetQuantizationOffset(qOffset);
1774  outputTensorInfo.SetQuantizationScale(qScale);
1775  outputTensorInfo.SetQuantizationOffset(qOffset);
1776  kernelDesc.SetQuantizationScale(qScale);
1777  kernelDesc.SetQuantizationOffset(qOffset);
1778  biasDesc.SetQuantizationScale(qScale*qScale);
1779  biasDesc.SetQuantizationOffset(0);
1780  }
1781  std::vector<T> inputData = std::vector<T>(
1782  QuantizedVector<T>({
1783  1.f, 2.f, 1.f,
1784  2.f, 1.f, 2.f,
1785  1.f, 2.f, 1.f,
1786 
1787  1.f, 2.f, 1.f,
1788  2.f, 1.f, 2.f,
1789  1.f, 2.f, 1.f,
1790  },
1791  inputTensorInfo.GetQuantizationScale(),
1792  inputTensorInfo.GetQuantizationOffset()));
1793 
1794  // at this point if we require it permute the input data
1795  const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
1796  if (layout == armnn::DataLayout::NHWC)
1797  {
1798  std::vector<T> tmp(inputData.size());
1799  armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
1800  inputData = tmp;
1801  }
1802  auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
1803 
1804  std::vector<B> biasV(QuantizedVector<B>({ 0, 2 },
1805  biasDesc.GetQuantizationScale(),
1806  biasDesc.GetQuantizationOffset()));
1807 
1808  auto bias = MakeTensor<B, 1>(biasDesc, biasV);
1809 
1810  std::vector<T> kernelData = std::vector<T>(
1811  QuantizedVector<T>({
1812  1.f, 0.f, 1.f,
1813  0.f, 0.f, 0.f,
1814  -1.f, 0.f, -1.f,
1815 
1816  1.f, 0.f, 1.f,
1817  0.f, 0.f, 0.f,
1818  -1.f, 0.f, -1.f,
1819  },
1820  kernelDesc.GetQuantizationScale(),
1821  kernelDesc.GetQuantizationOffset()));
1822 
1823  auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
1824 
1825  // Manually calculated.
1826  std::vector<T> outputImage(
1827  QuantizedVector<T>({ 0.f, 0.f },
1828  outputTensorInfo.GetQuantizationScale(),
1829  outputTensorInfo.GetQuantizationOffset())
1830  );
1831 
1832  // Optionally apply bias to output image.
1833  if(biasEnabled)
1834  {
1835  ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
1836  biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
1837  outputWidth, outputHeight);
1838  }
1839 
1840  LayerTestResult<T, 4> ret(outputTensorInfo);
1841  if (layout == armnn::DataLayout::NHWC)
1842  {
1843  std::vector<T> tmp(outputImage.size());
1844  armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data(), sizeof(T));
1845  outputImage = tmp;
1846  }
1847 
1848  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
1849 
1850  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
1851  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1852 
1855  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
1856  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
1857 
1858  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
1859  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
1860 
1861  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1862  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1863 
1864  data.m_Weight = &weightsTensor;
1865  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
1866  data.m_Parameters.m_StrideX = 1;
1867  data.m_Parameters.m_StrideY = 1;
1868  data.m_Parameters.m_PadLeft = 0;
1869  data.m_Parameters.m_PadRight = 0;
1870  data.m_Parameters.m_PadTop = 0;
1871  data.m_Parameters.m_PadBottom = 0;
1872  data.m_Parameters.m_BiasEnabled = biasEnabled;
1873  data.m_Parameters.m_DataLayout = layout;
1874 
1875  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
1876  inputHandle->Allocate();
1877  outputHandle->Allocate();
1878 
1879  CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
1880 
1881  ExecuteWorkload(*workload, memoryManager);
1882 
1883  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1884 
1885  return ret;
1886 }
1887 
1888 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
1890  armnn::IWorkloadFactory& workloadFactory,
1892  float qScale,
1893  int32_t qOffset,
1894  bool biasEnabled,
1895  const armnn::DataLayout layout)
1896 {
1898 
1899  unsigned int depthMultiplier = 2;
1900 
1901  unsigned int inputHeight = 8;
1902  unsigned int inputWidth = 16;
1903  unsigned int inputChannels = 2;
1904  unsigned int inputBatchSize = 1;
1905 
1906  unsigned int kernelHeight = 5;
1907  unsigned int kernelWidth = 3;
1908 
1909  unsigned int outputHeight = inputHeight - kernelHeight + 1 + 2;
1910  unsigned int outputWidth = (inputWidth - kernelWidth + 1)/2;
1911  unsigned int outputChannels = inputChannels * depthMultiplier;
1912  unsigned int outputBatchSize = inputBatchSize;
1913 
1914  armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo(
1915  inputBatchSize, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
1916  armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo(
1917  outputBatchSize, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
1918  armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth},
1919  ArmnnType);
1920  armnn::TensorInfo biasDesc({outputChannels}, ArmnnBType);
1921 
1922  // Set quantization parameters if the requested type is a quantized type.
1923  if(armnn::IsQuantizedType<T>())
1924  {
1925  inputTensorInfo.SetQuantizationScale(qScale);
1926  inputTensorInfo.SetQuantizationOffset(qOffset);
1927  outputTensorInfo.SetQuantizationScale(qScale);
1928  outputTensorInfo.SetQuantizationOffset(qOffset);
1929  kernelDesc.SetQuantizationScale(qScale);
1930  kernelDesc.SetQuantizationOffset(qOffset);
1931  biasDesc.SetQuantizationScale(qScale*qScale);
1932  biasDesc.SetQuantizationOffset(0);
1933  }
1934 
1935  // NOTE: originalInputData is in NCHW format
1936  std::vector<T> originalInputData = std::vector<T>(
1937  QuantizedVector<T>({
1938  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1939  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1940  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1941  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1942  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1943  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1944  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1945  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1946  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1947  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1948  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1949  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1950  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1951  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1952  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1953  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
1954  },
1955  inputTensorInfo.GetQuantizationScale(),
1956  inputTensorInfo.GetQuantizationOffset()));
1957 
1958  std::vector<T> inputData = originalInputData;
1959  // at this point if we require it permute the input data
1960  const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
1961  if (layout == armnn::DataLayout::NHWC)
1962  {
1963  armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC,
1964  originalInputData.data(), inputData.data(), sizeof(T));
1965  }
1966  auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
1967 
1968  std::vector<B> biasV = QuantizedVector<B>({ 0, 2, 1, -1 },
1969  biasDesc.GetQuantizationScale(),
1970  biasDesc.GetQuantizationOffset());
1971 
1972  auto bias = MakeTensor<B, 1>(biasDesc, biasV);
1973 
1974  std::vector<T> kernelData = std::vector<T>(
1975  QuantizedVector<T>({
1976  1, 1, 1,
1977  1, -1, 1,
1978  1, 1, 1,
1979  1, 1, 1,
1980  1, 1, 1,
1981 
1982  2, 2, 2,
1983  2, 2, 2,
1984  2, 2, 2,
1985  2, 2, 2,
1986  2, 2, 2,
1987 
1988  0, 0, 0,
1989  0, -1, 0,
1990  0, 0, 0,
1991  0, 0, 0,
1992  0, 0, 0,
1993 
1994  0, 0, 0,
1995  0, 0, 0,
1996  0, 1, 0,
1997  0, 0, 0,
1998  0, 0, 0
1999  },
2000  kernelDesc.GetQuantizationScale(),
2001  kernelDesc.GetQuantizationOffset()));
2002 
2003  auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
2004 
2005  // Manually calculated.
2006  std::vector<T> originalOutputImage = std::vector<T>(
2007  QuantizedVector<T>({
2008  3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f,
2009  6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f,
2010  5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
2011  6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
2012  6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
2013  5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
2014 
2015  -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
2016  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2017  -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
2018  -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
2019  -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
2020  -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
2021 
2022  8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2023  10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2024  10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2025  10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2026  10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2027  8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2028 
2029  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2030  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2031  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2032  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2033  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2034  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
2035  },
2036  outputTensorInfo.GetQuantizationScale(),
2037  outputTensorInfo.GetQuantizationOffset()));
2038 
2039  // Optionally apply bias to output image.
2040  if(biasEnabled)
2041  {
2042  ApplyBias(originalOutputImage,
2043  outputTensorInfo.GetQuantizationScale(),
2044  outputTensorInfo.GetQuantizationOffset(),
2045  biasV,
2046  biasDesc.GetQuantizationScale(),
2047  biasDesc.GetQuantizationOffset(),
2048  outputWidth,
2049  outputHeight);
2050  }
2051 
2052  LayerTestResult<T, 4> ret(outputTensorInfo);
2053  std::vector<T> outputImage = originalOutputImage;
2054  if (layout == armnn::DataLayout::NHWC)
2055  {
2056  armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC,
2057  originalOutputImage.data(), outputImage.data(), sizeof(T));
2058  }
2059 
2060  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
2061 
2062  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2063  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2064 
2067  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
2068  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
2069 
2070  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
2071  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
2072 
2073  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
2074  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2075 
2076  data.m_Weight = &weightsTensor;
2077  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
2078  data.m_Parameters.m_StrideX = 2;
2079  data.m_Parameters.m_StrideY = 1;
2080  data.m_Parameters.m_PadLeft = 0;
2081  data.m_Parameters.m_PadRight = 0;
2082  data.m_Parameters.m_PadTop = 1;
2083  data.m_Parameters.m_PadBottom = 1;
2084  data.m_Parameters.m_BiasEnabled = biasEnabled;
2085  data.m_Parameters.m_DataLayout = layout;
2086 
2087  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
2088  inputHandle->Allocate();
2089  outputHandle->Allocate();
2090 
2091  CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
2092 
2093  ExecuteWorkload(*workload, memoryManager);
2094 
2095  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
2096 
2097  return ret;
2098 }
2099 
2100 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2103  armnn::IWorkloadFactory& workloadFactory,
2105  const boost::multi_array<T, 4>& originalInput,
2106  const boost::multi_array<T, 4>& originalKernel,
2107  const boost::multi_array<B, 1>& bias,
2108  const boost::multi_array<T, 4>& originalOutputExpected,
2109  float qScale,
2110  int32_t qOffset,
2112  uint32_t padLeft = 0,
2113  uint32_t padTop = 0,
2114  uint32_t padRight = 0,
2115  uint32_t padBottom = 0,
2116  uint32_t strideX = 1,
2117  uint32_t strideY = 1,
2118  uint32_t dilationX = 1,
2119  uint32_t dilationY = 1)
2120 {
2121  unsigned int inputHeight = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
2122  unsigned int inputWidth = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
2123  unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
2124  unsigned int inputNum = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
2125 
2126  unsigned int outputHeight = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
2127  unsigned int outputWidth = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
2128  unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
2129  unsigned int outputNum = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
2130 
2131  unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
2132  unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
2133  unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
2134  unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
2135 
2136  bool biasEnabled = bias.size() > 0;
2137 
2138  // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
2139  ARMNN_ASSERT(inputNum == 1);
2140  ARMNN_ASSERT(outputNum == 1);
2141 
2142  // If a bias is used, its size must equal the number of output channels.
2143  ARMNN_ASSERT(!biasEnabled || bias.size() == outputChannels);
2144 
2145 
2146  // Note these tensors will use two (identical) batches.
2147  armnn::TensorInfo inputTensorInfo =
2148  armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
2149  armnn::TensorInfo outputTensorInfo =
2150  armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
2151 
2152  // Kernel must be NCHW layout always, independently of the layout of the input and output for depthwise convolution.
2153  armnn::TensorInfo kernelDesc({kernelDepthMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
2154 
2155  armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
2156 
2157  // Set quantization parameters if the requested type is a quantized type.
2158  if(armnn::IsQuantizedType<T>())
2159  {
2160  inputTensorInfo.SetQuantizationScale(qScale);
2161  inputTensorInfo.SetQuantizationOffset(qOffset);
2162  outputTensorInfo.SetQuantizationScale(qScale);
2163  outputTensorInfo.SetQuantizationOffset(qOffset);
2164  kernelDesc.SetQuantizationScale(qScale);
2165  kernelDesc.SetQuantizationOffset(qOffset);
2166  biasDesc.SetQuantizationScale(qScale*qScale);
2167  biasDesc.SetQuantizationOffset(0);
2168  }
2169 
2170  LayerTestResult<T, 4> ret(outputTensorInfo);
2171 
2172  // Construct input data
2173  std::vector<T> input;
2174  input.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth);
2175  std::vector<T> inputData;
2176  inputData.insert(inputData.end(), input.begin(), input.end());
2177  inputData.insert(inputData.end(), input.begin(), input.end());
2178 
2179  // at this point if we require it permute the input data
2180  const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
2181  if (layout == armnn::DataLayout::NHWC)
2182  {
2183  std::vector<T> tmp(inputData.size());
2184  armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
2185  inputData = tmp;
2186  }
2187 
2188  auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
2189 
2190  std::vector<T> output;
2191  output.assign(originalOutputExpected.data(),
2192  originalOutputExpected.data() + outputChannels*outputHeight*outputWidth);
2193 
2194  // Apply bias to output data if it is enabled.
2195  if(biasEnabled)
2196  {
2197  std::vector<T> biasV;
2198  biasV.assign(bias.data(), bias.data() + outputChannels);
2199  ApplyBias(output, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
2200  biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
2201  outputWidth, outputHeight);
2202  }
2203 
2204  // Construct expected output data
2205  std::vector<T> outputData;
2206  outputData.insert(outputData.end(), output.begin(), output.end());
2207  outputData.insert(outputData.end(), output.begin(), output.end());
2208 
2209  // at this point if we require it permute the expected output
2210  if (layout == armnn::DataLayout::NHWC)
2211  {
2212  std::vector<T> tmp(outputData.size());
2213  armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
2214  outputData = tmp;
2215  }
2216  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
2217 
2218  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2219  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2220 
2223  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
2224  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
2225 
2226  boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
2227  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
2228 
2229  if(biasEnabled)
2230  {
2231  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
2232  }
2233 
2234  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
2235  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2236 
2237  data.m_Weight = &weightsTensor;
2238  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
2239  data.m_Parameters.m_StrideX = strideX;
2240  data.m_Parameters.m_StrideY = strideY;
2241  data.m_Parameters.m_PadLeft = padLeft;
2242  data.m_Parameters.m_PadRight = padRight;
2243  data.m_Parameters.m_PadTop = padTop;
2244  data.m_Parameters.m_PadBottom = padBottom;
2245  data.m_Parameters.m_BiasEnabled = biasEnabled;
2246  data.m_Parameters.m_DataLayout = layout;
2247  data.m_Parameters.m_DilationX = dilationX;
2248  data.m_Parameters.m_DilationY = dilationY;
2249 
2250  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
2251  inputHandle->Allocate();
2252  outputHandle->Allocate();
2253 
2254  CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
2255 
2256  ExecuteWorkload(*workload, memoryManager);
2257 
2258  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
2259 
2260  return ret;
2261 }
2262 
2263 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2264  typename T = armnn::ResolveType<ArmnnType>>
2266  armnn::IWorkloadFactory& workloadFactory,
2268  float qScale,
2269  int32_t qOffset,
2270  bool biasEnabled,
2271  const armnn::DataLayout layout)
2272 {
2273  // Use a single-batch 2-channel 5x5 image as input.
2274  armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
2275  auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
2276  QuantizedVector<T>({
2277  0, 1, 2, 3, 4,
2278  5, 6, 7, 8, 9,
2279  10, 11, 12, 13, 14,
2280  15, 16, 17, 18, 19,
2281  20, 21, 22, 23, 24,
2282 
2283  25, 26, 27, 28, 29,
2284  30, 31, 32, 33, 34,
2285  35, 36, 37, 38, 39,
2286  40, 41, 42, 43, 44,
2287  45, 46, 47, 48, 49
2288  },
2289  inputTensorInfo.GetQuantizationScale(),
2290  inputTensorInfo.GetQuantizationOffset())));
2291 
2292  // Use a depth multiplier of 1 on a 2-channel 4x4 kernel.
2293  armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
2294  auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
2295  QuantizedVector<T>({
2296  32, 31, 30, 29,
2297  28, 27, 26, 25,
2298  24, 23, 22, 21,
2299  20, 19, 18, 17,
2300 
2301  16, 15, 14, 13,
2302  12, 11, 10, 9,
2303  8, 7, 6, 5,
2304  4, 3, 2, 1
2305  },
2306  kernelTensorInfo.GetQuantizationScale(),
2307  kernelTensorInfo.GetQuantizationOffset())));
2308 
2309  // Expected output is 1 batch of a 2-channel 5x5 image.
2310  // Calculated using the python tensorflow library with strideX=1, strideY=1.
2311  armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
2312  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
2313  QuantizedVector<T>({
2314  1062, 1580, 1850, 1530, 1117,
2315  2140, 3108, 3500, 2842, 2042,
2316  3580, 5068, 5460, 4342, 3062,
2317  3618, 5072, 5390, 4248, 2971,
2318  3074, 4282, 4510, 3533, 2457,
2319 
2320  1550, 2284, 2362, 1955, 1428,
2321  2910, 4206, 4342, 3528, 2536,
2322  3390, 4886, 5022, 4068, 2916,
2323  3566, 5056, 5182, 4133, 2922,
2324  3100, 4352, 4452, 3517, 2465
2325  },
2326  outputTensorInfo.GetQuantizationScale(),
2327  outputTensorInfo.GetQuantizationOffset())));
2328 
2329  return DepthwiseConvolution2dAsymmetricTestImpl<ArmnnType, ArmnnBType>(
2330  workloadFactory,
2331  memoryManager,
2332  input,
2333  kernel,
2334  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
2335  expectedOutput,
2336  qScale,
2337  qOffset,
2338  layout,
2339  1, // Padding left.
2340  1, // Padding top.
2341  2, // Padding right.
2342  2, // Padding bottom.
2343  1, // strideX
2344  1); // strideY
2345 }
2346 
2347 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2348  typename T = armnn::ResolveType<ArmnnType>>
2350  armnn::IWorkloadFactory& workloadFactory,
2352  float qScale,
2353  int32_t qOffset,
2354  bool biasEnabled)
2355 {
2356  auto layout = armnn::DataLayout::NHWC;
2357 
2358  armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
2359  auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
2360  QuantizedVector<T>({
2361  0, 1, 2, 3, 4,
2362  5, 6, 7, 8, 9,
2363  10, 11, 12, 13, 14,
2364  15, 16, 17, 18, 19,
2365  20, 21, 22, 23, 24,
2366 
2367  25, 26, 27, 28, 29,
2368  30, 31, 32, 33, 34,
2369  35, 36, 37, 38, 39,
2370  40, 41, 42, 43, 44,
2371  45, 46, 47, 48, 49
2372  },
2373  inputTensorInfo.GetQuantizationScale(),
2374  inputTensorInfo.GetQuantizationOffset())));
2375 
2376  armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
2377  auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
2378  QuantizedVector<T>({
2379  32, 31, 30, 29,
2380  28, 27, 26, 25,
2381  24, 23, 22, 21,
2382  20, 19, 18, 17,
2383 
2384  16, 15, 14, 13,
2385  12, 11, 10, 9,
2386  8, 7, 6, 5,
2387  4, 3, 2, 1
2388  },
2389  kernelTensorInfo.GetQuantizationScale(),
2390  kernelTensorInfo.GetQuantizationOffset())));
2391 
2392  armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
2393  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
2394  QuantizedVector<T>({
2395  1062, 1580, 1850, 1530, 1117,
2396  2140, 3108, 3500, 2842, 2042,
2397  3580, 5068, 5460, 4342, 3062,
2398  3618, 5072, 5390, 4248, 2971,
2399  3074, 4282, 4510, 3533, 2457,
2400 
2401  1550, 2284, 2362, 1955, 1428,
2402  2910, 4206, 4342, 3528, 2536,
2403  3390, 4886, 5022, 4068, 2916,
2404  3566, 5056, 5182, 4133, 2922,
2405  3100, 4352, 4452, 3517, 2465
2406  },
2407  outputTensorInfo.GetQuantizationScale(),
2408  outputTensorInfo.GetQuantizationOffset())));
2409 
2410  return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
2411  workloadFactory,
2412  memoryManager,
2413  input,
2414  kernel,
2415  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
2416  expectedOutput,
2417  qScale,
2418  qOffset,
2419  layout,
2420  1, // Padding left.
2421  1, // Padding top.
2422  2, // Padding right.
2423  2, // Padding bottom.
2424  1, // strideX
2425  1); // strideY
2426 }
2427 
2428 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2429  typename T = armnn::ResolveType<ArmnnType>>
2431  armnn::IWorkloadFactory& workloadFactory,
2433  float qScale,
2434  int32_t qOffset,
2435  bool biasEnabled)
2436 {
2437  auto layout = armnn::DataLayout::NHWC;
2438 
2439  armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9}, ArmnnType);
2440  auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
2441  QuantizedVector<T>({
2442  0, 0, 0, 0, 0, 0, 0, 0, 0,
2443  0, 0, 0, 0, 0, 0, 0, 0, 0,
2444  0, 0, 0, 0, 0, 0, 0, 0, 0,
2445  0, 0, 0, 1, 1, 1, 0, 0, 0,
2446  0, 0, 0, 1, 1, 1, 0, 0, 0,
2447  0, 0, 0, 1, 1, 1, 0, 0, 0,
2448  0, 0, 0, 0, 0, 0, 0, 0, 0,
2449  0, 0, 0, 0, 0, 0, 0, 0, 0,
2450  0, 0, 0, 0, 0, 0, 0, 0, 0
2451  },
2452  inputTensorInfo.GetQuantizationScale(),
2453  inputTensorInfo.GetQuantizationOffset())));
2454 
2455  armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
2456  auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
2457  QuantizedVector<T>({
2458  1, 2, 3,
2459  4, 5, 6,
2460  7, 8, 9
2461  },
2462  kernelTensorInfo.GetQuantizationScale(),
2463  kernelTensorInfo.GetQuantizationOffset())));
2464 
2465  uint32_t padLeft = 0;
2466  uint32_t padTop = 0;
2467  uint32_t padRight = 0;
2468  uint32_t padBottom = 0;
2469  uint32_t strideX = 1;
2470  uint32_t strideY = 1;
2471  uint32_t dilationX = 3;
2472  uint32_t dilationY = 3;
2473 
2474  // Since the dilation rate is 3 this will reduce the size of the output from 9x9 to 3x3 of all 5s.
2475  armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3}, ArmnnType);
2476  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
2477  QuantizedVector<T>({
2478  5, 5, 5,
2479  5, 5, 5,
2480  5, 5, 5
2481  },
2482  outputTensorInfo.GetQuantizationScale(),
2483  outputTensorInfo.GetQuantizationOffset())));
2484 
2485  return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
2486  workloadFactory,
2487  memoryManager,
2488  input,
2489  kernel,
2490  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
2491  expectedOutput,
2492  qScale,
2493  qOffset,
2494  layout,
2495  padLeft,
2496  padTop,
2497  padRight,
2498  padBottom,
2499  strideX,
2500  strideY,
2501  dilationX,
2502  dilationY);
2503 }
2504 
2505 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
2507  armnn::IWorkloadFactory& workloadFactory,
2509  const std::vector<float>& inputNoQuantizedValues,
2510  armnn::TensorInfo& inputTensorInfo,
2511  const std::vector<float>& kernelNoQuantizedValues,
2512  armnn::TensorInfo& kernelTensorInfo,
2513  const std::vector<float>& outputExpectedNoQuantizedValues,
2514  armnn::TensorInfo& outputTensorInfo,
2515  uint32_t dilationX,
2516  uint32_t dilationY,
2518  bool biasEnabled = false)
2519 {
2520  float qScale;
2521  int32_t qOffset;
2522  switch (ArmnnType)
2523  {
2526  {
2527  qScale = 0.1f;
2528  qOffset = 128;
2529  break;
2530  }
2532  {
2533  qScale = 0.1f;
2534  qOffset = 0;
2535  break;
2536  }
2538  default:
2539  {
2540  qScale = 0.f;
2541  qOffset = 0;
2542  break;
2543  }
2544  }
2545 
2546  inputTensorInfo.SetQuantizationScale(qScale);
2547  inputTensorInfo.SetQuantizationOffset(qOffset);
2548  kernelTensorInfo.SetQuantizationScale(qScale);
2549  kernelTensorInfo.SetQuantizationOffset(qOffset);
2550  outputTensorInfo.SetQuantizationScale(qScale);
2551  outputTensorInfo.SetQuantizationOffset(qOffset);
2552 
2553  auto input = MakeTensor<T, 4>(inputTensorInfo,
2554  std::vector<T>(QuantizedVector<T>(inputNoQuantizedValues,
2555  inputTensorInfo.GetQuantizationScale(),
2556  inputTensorInfo.GetQuantizationOffset())));
2557  auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
2558  std::vector<T>(QuantizedVector<T>(kernelNoQuantizedValues,
2559  kernelTensorInfo.GetQuantizationScale(),
2560  kernelTensorInfo.GetQuantizationOffset())));
2561  auto expectedOutput =
2562  MakeTensor<T, 4>(outputTensorInfo,
2563  std::vector<T>(QuantizedVector<T>(outputExpectedNoQuantizedValues,
2564  outputTensorInfo.GetQuantizationScale(),
2565  outputTensorInfo.GetQuantizationOffset())));
2566 
2567  uint32_t padLeft = 0;
2568  uint32_t padTop = 0;
2569  uint32_t padRight = 0;
2570  uint32_t padBottom = 0;
2571  uint32_t strideX = 1;
2572  uint32_t strideY = 1;
2573 
2574  return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
2575  workloadFactory,
2576  memoryManager,
2577  input,
2578  kernel,
2579  GetBias<ArmnnBType>(biasEnabled, qScale * qScale, outputTensorInfo, layout),
2580  expectedOutput,
2581  qScale,
2582  qOffset,
2583  layout,
2584  padLeft,
2585  padTop,
2586  padRight,
2587  padBottom,
2588  strideX,
2589  strideY,
2590  dilationX,
2591  dilationY);
2592 }
2593 
2594 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
2596  armnn::IWorkloadFactory& workloadFactory,
2598  bool biasEnabled,
2599  const armnn::DataLayout layout)
2600 {
2601  armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
2602  std::vector<float> inputNoQuantizedValues =
2603  {
2604  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2605  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2606  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2607  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2608  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2609  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2610  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2611  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2612  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2613  0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2614  };
2615 
2616  armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
2617  std::vector<float> kernelNoQuantizedValues =
2618  {
2619  1, 2, 3,
2620  4, 5, 6,
2621  7, 8, 9
2622  };
2623 
2624  // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
2625  // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
2626  armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
2627  std::vector<float> outputExpectedNoQuantizedValues =
2628  {
2629  6., 5., 5., 5.,
2630  6., 5., 5., 5.,
2631  6., 5., 5., 5.,
2632  3., 2., 2., 2.
2633  };
2634 
2635  return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2636  workloadFactory,
2637  memoryManager,
2638  inputNoQuantizedValues,
2639  inputTensorInfo,
2640  kernelNoQuantizedValues,
2641  kernelTensorInfo,
2642  outputExpectedNoQuantizedValues,
2643  outputTensorInfo,
2644  3,
2645  3,
2646  layout,
2647  biasEnabled);
2648 }
2649 
2650 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
2652  armnn::IWorkloadFactory& workloadFactory,
2654  bool biasEnabled,
2655  const armnn::DataLayout layout)
2656 {
2657  armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
2658  std::vector<float> inputNoQuantizedValues =
2659  {
2660  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2661  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2662  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2663  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2664  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2665  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2666  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2667  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2668  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2669  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2670 
2671  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2672  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2673  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2674  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2675  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2676  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2677  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2678  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2679  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2680  0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2681  };
2682 
2683  armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
2684  std::vector<float> kernelNoQuantizedValues =
2685  {
2686  1, 2, 3,
2687  4, 5, 6,
2688  7, 8, 9,
2689 
2690  1, 2, 3,
2691  4, 5, 6,
2692  7, 8, 9
2693  };
2694 
2695  // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
2696  // therefore the output will be 2x4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
2697  armnn::TensorInfo outputTensorInfo({ 1, 2, 4, 4}, ArmnnType);
2698  std::vector<float> outputExpectedNoQuantizedValues =
2699  {
2700  6., 5., 5., 5.,
2701  6., 5., 5., 5.,
2702  6., 5., 5., 5.,
2703  3., 2., 2., 2.,
2704 
2705  6., 5., 5., 5.,
2706  6., 5., 5., 5.,
2707  6., 5., 5., 5.,
2708  3., 2., 2., 2.
2709  };
2710 
2711  return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2712  workloadFactory,
2713  memoryManager,
2714  inputNoQuantizedValues,
2715  inputTensorInfo,
2716  kernelNoQuantizedValues,
2717  kernelTensorInfo,
2718  outputExpectedNoQuantizedValues,
2719  outputTensorInfo,
2720  3,
2721  3,
2722  layout,
2723  biasEnabled);
2724 }
2725 
2726 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
2728  armnn::IWorkloadFactory& workloadFactory,
2730  bool biasEnabled,
2731  const armnn::DataLayout layout)
2732 {
2733  armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
2734  std::vector<float> inputNoQuantizedValues =
2735  {
2736  10.0, 10.0, 10.0,
2737  10.0, 10.0, 10.0,
2738  10.0, 10.0, 10.0,
2739 
2740  21.0, 22.0, 23.0,
2741  24.0, 25.0, 26.0,
2742  27.0, 28.0, 29.0
2743  };
2744 
2745  armnn::TensorInfo kernelTensorInfo({ 4, 2, 2, 2}, ArmnnType);
2746 
2747  std::vector<float> kernelNoQuantizedValues =
2748  {
2749  0.25f, 0.25f,
2750  0.25f, 0.25f,
2751 
2752  0.25f, 0.25f,
2753  0.25f, 0.25f,
2754 
2755  0.0f , 0.0f,
2756  0.0f , 0.1f,
2757 
2758  0.0f , 0.0f,
2759  0.0f , 0.1f,
2760 
2761  0.2f , 0.0f,
2762  0.0f , 0.0f,
2763 
2764  0.2f , 0.0f,
2765  0.0f , 0.0f,
2766 
2767  0.0f , 0.3f,
2768  0.0f , 0.0f,
2769 
2770  0.0f , 0.3f,
2771  0.0f , 0.0f
2772  };
2773 
2774  armnn::TensorInfo outputTensorInfo({ 1, 8, 2, 2}, ArmnnType);
2775  std::vector<float> outputExpectedNoQuantizedValues =
2776  {
2777  10.f, 10.f,
2778  10.f, 10.f,
2779 
2780  1.f, 1.f,
2781  1.f, 1.f,
2782 
2783  2.f, 2.f,
2784  2.f, 2.f,
2785 
2786  3.f, 3.f,
2787  3.f, 3.f,
2788 
2789  23.f, 24.f,
2790  26.f, 27.f,
2791 
2792  2.5f, 2.6000001f,
2793  2.8f, 2.9f,
2794 
2795  4.2000003f, 4.4f,
2796  4.8f, 5.f,
2797 
2798  6.6000004f, 6.9f,
2799  7.5000005f, 7.8f
2800  };
2801 
2802 
2803  return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2804  workloadFactory,
2805  memoryManager,
2806  inputNoQuantizedValues,
2807  inputTensorInfo,
2808  kernelNoQuantizedValues,
2809  kernelTensorInfo,
2810  outputExpectedNoQuantizedValues,
2811  outputTensorInfo,
2812  1,
2813  1,
2814  layout,
2815  biasEnabled);
2816 }
2817 
2818 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
2820  armnn::IWorkloadFactory& workloadFactory,
2822  bool biasEnabled,
2823  const armnn::DataLayout layout)
2824 {
2825  armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
2826  std::vector<float> inputNoQuantizedValues =
2827  {
2828  10.0, 10.0, 10.0,
2829  10.0, 10.0, 10.0,
2830  10.0, 10.0, 10.0,
2831 
2832  21.0, 22.0, 23.0,
2833  24.0, 25.0, 26.0,
2834  27.0, 28.0, 29.0
2835  };
2836 
2837  armnn::TensorInfo kernelTensorInfo({ 2, 2, 2, 2}, ArmnnType);
2838 
2839  std::vector<float> kernelNoQuantizedValues =
2840  {
2841  0.25f, 0.25f,
2842  0.25f, 0.25f,
2843 
2844  0.2f , 0.0f,
2845  0.0f , 0.0f,
2846 
2847  0.0f , 0.0f,
2848  0.0f , 0.1f,
2849 
2850  0.0f , 0.3f,
2851  0.0f , 0.0f
2852 
2853  };
2854 
2855  armnn::TensorInfo outputTensorInfo({ 1, 4, 2, 2}, ArmnnType);
2856  std::vector<float> outputExpectedNoQuantizedValues =
2857  {
2858  10.f, 10.f,
2859  10.f, 10.f,
2860 
2861  1.f, 1.f,
2862  1.f, 1.f,
2863 
2864  4.2000003f, 4.4f,
2865  4.8f, 5.f,
2866 
2867  6.6000004f, 6.9f,
2868  7.5000005f, 7.8f
2869  };
2870 
2871 
2872  return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2873  workloadFactory,
2874  memoryManager,
2875  inputNoQuantizedValues,
2876  inputTensorInfo,
2877  kernelNoQuantizedValues,
2878  kernelTensorInfo,
2879  outputExpectedNoQuantizedValues,
2880  outputTensorInfo,
2881  1,
2882  1,
2883  layout,
2884  biasEnabled);
2885 }
2886 
2887 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
2889  armnn::IWorkloadFactory& workloadFactory,
2891  armnn::IWorkloadFactory& refWorkloadFactory,
2892  const armnnUtils::DataLayoutIndexed& layout)
2893 {
2894  unsigned int inputHeight = 8;
2895  unsigned int inputWidth = 16;
2896  unsigned int inputChannels = 3;
2897  unsigned int inputNum = 5;
2898 
2899  unsigned int kernelHeight = 3;
2900  unsigned int kernelWidth = 3;
2901  unsigned int channelMultiplier = 1;
2902 
2903  unsigned int strideX = 2;
2904  unsigned int strideY = 3;
2905  unsigned int padX = 1;
2906  unsigned int padY = 1;
2907 
2908  unsigned int outputNum = inputNum;
2909  unsigned int outputChannels = inputChannels * channelMultiplier;
2910  unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
2911  unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
2912 
2913  armnn::TensorInfo inputTensorInfo;
2914  armnn::TensorInfo outputTensorInfo;
2915  armnn::TensorInfo kernelDesc;
2916  armnn::TensorInfo biasDesc;
2917 
2918 
2919  std::vector<unsigned int> inputShape;
2920  std::vector<unsigned int> outputShape;
2921  std::vector<unsigned int> kernelShape{ channelMultiplier, inputChannels, kernelHeight, kernelWidth };
2922  std::vector<unsigned int> biasShape{ outputChannels };
2923  switch (layout.GetDataLayout())
2924  {
2926  inputShape = { inputNum, inputChannels, inputHeight, inputWidth };
2927  outputShape = { outputNum, outputChannels, outputHeight, outputWidth };
2928  break;
2930  inputShape = { inputNum, inputHeight, inputWidth, inputChannels };
2931  outputShape = { outputNum, outputHeight, outputWidth, outputChannels };
2932  break;
2933  default:
2934  throw armnn::InvalidArgumentException("unknown data layout ["
2935  + std::to_string(static_cast<int>(layout.GetDataLayout())) + "]");
2936  }
2937 
2938  float inputsQScale = armnn::IsQuantizedType<T>() ? 1.0f : 0;
2939  float outputQScale = armnn::IsQuantizedType<T>() ? 2.0f : 0;
2940  int32_t qOffset = 0;
2941 
2942  inputTensorInfo = armnn::TensorInfo(4, inputShape.data(), ArmnnType, inputsQScale, qOffset);
2943  outputTensorInfo = armnn::TensorInfo(4, outputShape.data(), ArmnnType, outputQScale, qOffset);
2944  kernelDesc = armnn::TensorInfo(4, kernelShape.data(), ArmnnType, inputsQScale, qOffset);
2945  biasDesc = armnn::TensorInfo(
2946  1, biasShape.data(), armnn::GetBiasDataType(ArmnnType), inputsQScale, qOffset);
2947 
2948  LayerTestResult<T, 4> ret(outputTensorInfo);
2949 
2950  auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908, 0.0f, 255.0f);
2951  auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234, 0.0f, 255.0f);
2952  auto bias = MakeRandomTensor<typename FullyConnectedBiasTypeForInputType<T>::Type, 1>(
2953  biasDesc, 1028, 0.0f, 255.0f);
2954 
2955  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2956  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2957 
2960  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
2961  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
2962 
2963  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
2964  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
2965 
2966  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
2967  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2968  data.m_Weight = &weightsTensor;
2969  data.m_Bias = &biasTensor;
2970  data.m_Parameters.m_StrideX = strideX;
2971  data.m_Parameters.m_StrideY = strideY;
2972  data.m_Parameters.m_PadLeft = padX;
2973  data.m_Parameters.m_PadRight = padX;
2974  data.m_Parameters.m_PadTop = padY;
2975  data.m_Parameters.m_PadBottom = padY;
2976  data.m_Parameters.m_BiasEnabled = true;
2977  data.m_Parameters.m_DataLayout = layout.GetDataLayout();
2978 
2979  std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
2980  std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
2981 
2983  armnn::WorkloadInfo refInfo = info;
2984  SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
2985  SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
2986 
2987  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
2988  std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateDepthwiseConvolution2d(refData, refInfo);
2989 
2990  outputHandleRef->Allocate();
2991  inputHandleRef->Allocate();
2992 
2993  inputHandle->Allocate();
2994  outputHandle->Allocate();
2995 
2996  CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
2997  CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
2998 
2999  ExecuteWorkload(*workload, memoryManager);
3000 
3001  workloadRef->PostAllocationConfigure();
3002  workloadRef->Execute();
3003 
3004  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
3005  CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
3006 
3007  return ret;
3008 }
3009 
3010 //
3011 // Explicit template specializations
3012 //
3014 Convolution2d3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3017  bool,
3019 
3021 Convolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3022  armnn::IWorkloadFactory&,
3023  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3024  bool,
3026 
3028 Convolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmS8, armnn::DataType::Signed32>(
3029  armnn::IWorkloadFactory&,
3030  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3031  bool,
3033 
3035 Convolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3036  armnn::IWorkloadFactory&,
3037  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3038  bool,
3040 
3042 Convolution2d3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3043  armnn::IWorkloadFactory&,
3044  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3045  bool,
3047 
3048 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3049 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3050  armnn::IWorkloadFactory&,
3051  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3052  bool,
3054 
3056 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3057  armnn::IWorkloadFactory&,
3058  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3059  bool,
3061 
3062 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4>
3063 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmS8, armnn::DataType::Signed32>(
3064  armnn::IWorkloadFactory&,
3065  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3066  bool,
3068 
3069 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
3070 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3071  armnn::IWorkloadFactory&,
3072  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3073  bool,
3075 
3076 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
3077 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3078  armnn::IWorkloadFactory&,
3079  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3080  bool,
3082 
3083 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3084 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3085  armnn::IWorkloadFactory &workloadFactory,
3086  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3087  bool biasEnabled,
3088  const armnn::DataLayout layout);
3089 
3090 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3091 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3092  armnn::IWorkloadFactory &workloadFactory,
3093  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3094  bool biasEnabled,
3095  const armnn::DataLayout layout);
3096 
3097 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4>
3098 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QAsymmS8, armnn::DataType::Signed32>(
3099  armnn::IWorkloadFactory &workloadFactory,
3100  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3101  bool biasEnabled,
3102  const armnn::DataLayout layout);
3103 
3104 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
3105 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3106  armnn::IWorkloadFactory &workloadFactory,
3107  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3108  bool biasEnabled,
3109  const armnn::DataLayout layout);
3110 
3111 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
3112 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3113  armnn::IWorkloadFactory &workloadFactory,
3114  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3115  bool biasEnabled,
3116  const armnn::DataLayout layout);
3117 
3118 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3119 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3120  armnn::IWorkloadFactory&,
3121  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3122  bool,
3124 
3125 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3126 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3127  armnn::IWorkloadFactory&,
3128  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3129  bool,
3131 
3132 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4>
3133 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmS8, armnn::DataType::Signed32>(
3134  armnn::IWorkloadFactory&,
3135  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3136  bool,
3138 
3139 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
3140 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3141  armnn::IWorkloadFactory&,
3142  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3143  bool,
3145 
3146 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
3147 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3148  armnn::IWorkloadFactory&,
3149  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3150  bool,
3152 
3153 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3154 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3155  armnn::IWorkloadFactory&,
3156  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3157  bool,
3159 
3160 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3161 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3162  armnn::IWorkloadFactory&,
3163  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3164  bool,
3166 
3167 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4>
3168 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmS8, armnn::DataType::Signed32>(
3169  armnn::IWorkloadFactory&,
3170  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3171  bool,
3173 
3174 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
3175 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3176  armnn::IWorkloadFactory&,
3177  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3178  bool,
3180 
3181 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
3182 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3183  armnn::IWorkloadFactory&,
3184  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3185  bool,
3187 
3188 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3189 DepthwiseConvolution2dMult4Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3190  armnn::IWorkloadFactory &workloadFactory,
3191  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3192  bool biasEnabled,
3193  const armnn::DataLayout layout);
3194 
3195 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3196 DepthwiseConvolution2dMult4Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3197  armnn::IWorkloadFactory &workloadFactory,
3198  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3199  bool biasEnabled,
3200  const armnn::DataLayout layout);
3201 
3202 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3203 DepthwiseConvolution2dMult2Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3204  armnn::IWorkloadFactory &workloadFactory,
3205  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3206  bool biasEnabled,
3207  const armnn::DataLayout layout);
3208 
3209 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3210 DepthwiseConvolution2dMult2Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3211  armnn::IWorkloadFactory &workloadFactory,
3212  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3213  bool biasEnabled,
3214  const armnn::DataLayout layout);
3215 
3216 //
3217 // Implementation functions
3218 //
3219 
3221  armnn::IWorkloadFactory& workloadFactory,
3222  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3223  bool biasEnabled,
3224  const armnn::DataLayout layout)
3225 {
3226  return SimpleConvolution2d3x5TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3227  workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
3228 }
3229 
3231  armnn::IWorkloadFactory& workloadFactory,
3232  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3233  bool biasEnabled,
3234  const armnn::DataLayout layout)
3235 {
3236  return SimpleConvolution2d3x5TestCommon<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3237  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
3238 }
3239 
3241  armnn::IWorkloadFactory& workloadFactory,
3242  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3243  bool biasEnabled,
3244  const armnn::DataLayout layout)
3245 {
3246  return SimpleConvolution2d3x3TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3247  workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
3248 }
3249 
3251  armnn::IWorkloadFactory& workloadFactory,
3252  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3253  bool biasEnabled)
3254 {
3255  return SimpleConvolution2d3x3NhwcTestCommon<armnn::DataType::Float32>(
3256  workloadFactory,
3257  memoryManager,
3258  0.f,
3259  0,
3260  biasEnabled,
3262 }
3263 
3265  armnn::IWorkloadFactory& workloadFactory,
3266  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3267  bool biasEnabled,
3268  const armnn::DataLayout layout)
3269 {
3270  return SimpleConvolution2d3x3Stride2x2TestCommon<armnn::DataType::Float32>(
3271  workloadFactory,
3272  memoryManager,
3273  0.f,
3274  0,
3275  biasEnabled,
3276  layout);
3277 }
3278 
3280  armnn::IWorkloadFactory& workloadFactory,
3281  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3282  bool biasEnabled,
3283  const armnn::DataLayout layout)
3284 {
3285  return SimpleConvolution2d3x3TestCommon<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3286  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
3287 }
3288 
3290  armnn::IWorkloadFactory& workloadFactory,
3291  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3292  bool biasEnabled,
3293  const armnn::DataLayout layout)
3294 {
3295  return SimpleConvolution2d3x5TestCommon<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3296  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
3297 }
3298 
3300  armnn::IWorkloadFactory& workloadFactory,
3301  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3302  bool biasEnabled,
3303  const armnn::DataLayout layout)
3304 {
3305  return SimpleConvolution2d3x3TestCommon<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3306  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
3307 }
3308 
3310  armnn::IWorkloadFactory& workloadFactory,
3311  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3312  armnn::DataLayout layout)
3313 {
3314  return SimpleConvolution2dAsymmetricPaddingTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3315  workloadFactory, memoryManager, layout, 0.0f, 0);
3316 }
3317 
3319  armnn::IWorkloadFactory& workloadFactory,
3320  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3321  armnn::DataLayout layout)
3322 {
3324  <armnn::DataType::Float32, armnn::DataType::Float32>(
3325  workloadFactory, memoryManager, layout, 0.0f, 0);
3326 }
3327 
3329  armnn::IWorkloadFactory& workloadFactory,
3330  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3331  bool biasEnabled)
3332 {
3333  return Convolution1dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3334  workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
3335 }
3336 
3338  armnn::IWorkloadFactory& workloadFactory,
3339  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3340  bool biasEnabled)
3341 {
3342  return Convolution1dTestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3343  workloadFactory, memoryManager, 0.1f, 128, biasEnabled);
3344 }
3345 
3347  armnn::IWorkloadFactory& workloadFactory,
3348  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3349  const armnn::DataLayout layout)
3350 {
3351  using namespace armnn;
3352 
3353  const DataType inputType = DataType::QAsymmU8;
3354  const DataType kernelType = DataType::QSymmS8;
3355  const DataType biasType = DataType::Signed32;
3356 
3357  TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128);
3358  TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128);
3359 
3360  const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f };
3361  constexpr unsigned int quantDimension = 0;
3362 
3363  TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension);
3364 
3365  const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f };
3366  TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension);
3367 
3368  std::vector<uint8_t> inputData =
3369  {
3370  138, 108, 138, 108, 138, 108
3371  };
3372 
3373  std::vector<int8_t> kernelData =
3374  {
3375  1, 2, 1, 2, 1, 2
3376  };
3377 
3378  std::vector<int32_t> biasData =
3379  {
3380  4, 4, 4
3381  };
3382 
3383  std::vector<uint8_t> expectedOutputData =
3384  {
3385  121, 118, 115, 121, 118, 115, 121, 118, 115
3386  };
3387 
3388  if (layout == DataLayout::NCHW)
3389  {
3390  PermuteTensorNhwcToNchw(inputInfo, inputData);
3391  PermuteTensorNhwcToNchw(kernelInfo, kernelData);
3392  PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
3393  }
3394 
3395  Convolution2dDescriptor descriptor;
3396  descriptor.m_StrideX = 1;
3397  descriptor.m_StrideY = 1;
3398  descriptor.m_PadLeft = 0;
3399  descriptor.m_PadRight = 0;
3400  descriptor.m_PadTop = 0;
3401  descriptor.m_PadBottom = 0;
3402  descriptor.m_BiasEnabled = true;
3403  descriptor.m_DataLayout = layout;
3404 
3405  std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
3406  std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
3407 
3408  WorkloadInfo workloadInfo;
3409  ScopedCpuTensorHandle weightTensor(kernelInfo);
3410  ScopedCpuTensorHandle biasTensor(biasInfo);
3411 
3412  AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
3413  AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
3414 
3415  Convolution2dQueueDescriptor queueDescriptor;
3416  queueDescriptor.m_Parameters = descriptor;
3417  queueDescriptor.m_Weight = &weightTensor;
3418  queueDescriptor.m_Bias = &biasTensor;
3419 
3420  AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
3421  AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
3422 
3423  std::unique_ptr<IWorkload> workload = workloadFactory.CreateConvolution2d(queueDescriptor, workloadInfo);
3424  inputHandle->Allocate();
3425  outputHandle->Allocate();
3426 
3427  CopyDataToITensorHandle(inputHandle.get(), inputData.data());
3428 
3429  ExecuteWorkload(*workload, memoryManager);
3430 
3431  LayerTestResult<uint8_t, 4> ret(outputInfo);
3432  CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
3433  ret.outputExpected = MakeTensor<uint8_t, 4>(outputInfo, expectedOutputData);
3434 
3435  return ret;
3436 }
3437 
3439  armnn::IWorkloadFactory& workloadFactory,
3440  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3441  armnn::IWorkloadFactory& refWorkloadFactory)
3442 {
3443  return CompareConvolution2dTestImpl<armnn::DataType::Float32>(
3444  workloadFactory, memoryManager, refWorkloadFactory);
3445 }
3446 
3448  armnn::IWorkloadFactory& workloadFactory,
3449  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3450  bool biasEnabled,
3451  const armnn::DataLayout layout)
3452 {
3453  return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3454  workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
3455 }
3456 
3458  armnn::IWorkloadFactory& workloadFactory,
3459  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3460  bool biasEnabled)
3461 {
3462  return DepthwiseConvolution2dNhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3463  workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
3464 }
3465 
3467  armnn::IWorkloadFactory& workloadFactory,
3468  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3469  bool biasEnabled,
3470  const armnn::DataLayout layout)
3471 {
3472  return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3473  workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
3474 }
3475 
3477  armnn::IWorkloadFactory& workloadFactory,
3478  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3479 {
3480  armnn::TensorInfo inputTensorInfo({ 1, 1, 2, 2 }, armnn::DataType::Float32);
3481  auto input = MakeTensor<float, 4>(inputTensorInfo, { 1.f, 2.f, 3.f, 4.f });
3482 
3483  std::vector<float> kernelData;
3484  std::vector<float> singleDepthKernel{ 1.f, -1.f, -1.f, 1.f };
3485  for (unsigned int i = 0; i < 64; ++i)
3486  {
3487  kernelData.insert(kernelData.end(), singleDepthKernel.begin(), singleDepthKernel.end());
3488  }
3489  armnn::TensorInfo kernelTensorInfo({ 64, 1, 2, 2 }, armnn::DataType::Float32);
3490  auto kernel = MakeTensor<float, 4>(kernelTensorInfo, kernelData);
3491 
3492  std::vector<float> expectedOutputData(64, 0.f);
3493  armnn::TensorInfo outputTensorInfo({ 1, 64, 1, 1 }, armnn::DataType::Float32);
3494  auto expectedOutput = MakeTensor<float, 4>(outputTensorInfo, expectedOutputData);
3495 
3496  return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3497  workloadFactory,
3498  memoryManager,
3499  input,
3500  kernel,
3501  boost::multi_array<float, 1>(),
3502  expectedOutput,
3503  0.f,
3504  0,
3506 }
3507 
3509  armnn::IWorkloadFactory& workloadFactory,
3510  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3511  bool biasEnabled,
3512  const armnn::DataLayout layout)
3513 {
3514  return DepthwiseConvolution2dAsymmetricTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3515  workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
3516 }
3517 
3519  armnn::IWorkloadFactory& workloadFactory,
3520  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3521  bool biasEnabled,
3522  const armnn::DataLayout layout)
3523 {
3524  return DepthwiseConvolution2dTestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3525  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
3526 }
3527 
3529  armnn::IWorkloadFactory& workloadFactory,
3530  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3531  bool biasEnabled,
3532  const armnn::DataLayout layout)
3533 {
3534  return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3535  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
3536 }
3537 
3539  armnn::IWorkloadFactory& workloadFactory,
3540  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3541 {
3542  return SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3543  workloadFactory,
3544  memoryManager,
3545  0.f,
3546  0,
3547  false);
3548 }
3549 
3551  armnn::IWorkloadFactory& workloadFactory,
3552  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3553  bool biasEnabled,
3554  const armnn::DataLayout layout)
3555 {
3556  return DepthwiseConvolution2dTestImpl<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3557  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
3558 }
3559 
3561  armnn::IWorkloadFactory& workloadFactory,
3562  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3563  bool biasEnabled,
3564  const armnn::DataLayout layout)
3565 {
3566  return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3567  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
3568 }
3569 
3571  armnn::IWorkloadFactory& workloadFactory,
3572  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3573  const armnn::DataLayout layout)
3574 {
3575  using namespace armnn;
3576 
3577  const DataType inputType = DataType::QAsymmU8;
3578  const DataType kernelType = DataType::QSymmS8;
3579  const DataType biasType = DataType::Signed32;
3580 
3581  TensorInfo inputInfo ({ 1, 3, 3, 2 }, inputType, 0.5f, 128); // N H W C
3582  TensorInfo outputInfo({ 1, 2, 2, 4 }, inputType, 1.0f, 128); // N H W C
3583 
3584  const std::vector<float> quantScales{ 1.0f, 0.5f, 1.0f, 0.5f };
3585  const unsigned int quantDimension = 0;
3586  TensorInfo kernelInfo({ 2, 2, 2, 2 }, kernelType, quantScales, quantDimension); // M I H W
3587 
3588  const std::vector<float> biasQuantScales{ 0.5f, 0.25f, 0.5f, 0.25f };
3589  constexpr unsigned int biasQuantDimension = 0;
3590  TensorInfo biasInfo({ 4 }, biasType, biasQuantScales, biasQuantDimension);
3591 
3592  std::vector<uint8_t> inputData =
3593  {
3594  129, 130,
3595  129, 130,
3596  129, 130,
3597  129, 130,
3598  129, 130,
3599  129, 130,
3600  129, 130,
3601  129, 130,
3602  129, 130
3603  };
3604 
3605  std::vector<int8_t> kernelData =
3606  {
3607  1, 1, 1, 1,
3608  1, 1, 1, 1,
3609  1, 1, 1, 1,
3610  1, 1, 1, 1
3611  };
3612 
3613  std::vector<int32_t> biasData =
3614  {
3615  4, 4, 4, 4
3616  };
3617 
3618  std::vector<uint8_t> expectedOutputData =
3619  {
3620  132, 130, 134, 131,
3621  132, 130, 134, 131,
3622  132, 130, 134, 131,
3623  132, 130, 134, 131
3624  };
3625 
3626  if (layout == DataLayout::NCHW)
3627  {
3628  PermuteTensorNhwcToNchw(inputInfo, inputData);
3629  PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
3630  }
3631 
3633  descriptor.m_StrideX = 1;
3634  descriptor.m_StrideY = 1;
3635  descriptor.m_PadLeft = 0;
3636  descriptor.m_PadRight = 0;
3637  descriptor.m_PadTop = 0;
3638  descriptor.m_PadBottom = 0;
3639  descriptor.m_DilationX = 1;
3640  descriptor.m_DilationY = 1;
3641  descriptor.m_BiasEnabled = true;
3642  descriptor.m_DataLayout = layout;
3643 
3644  std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
3645  std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
3646 
3647  WorkloadInfo workloadInfo;
3648  ScopedCpuTensorHandle weightTensor(kernelInfo);
3649  ScopedCpuTensorHandle biasTensor(biasInfo);
3650 
3651  AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
3652  AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
3653 
3654  DepthwiseConvolution2dQueueDescriptor queueDescriptor;
3655  queueDescriptor.m_Parameters = descriptor;
3656  queueDescriptor.m_Weight = &weightTensor;
3657  queueDescriptor.m_Bias = &biasTensor;
3658 
3659  AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
3660  AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
3661 
3662  std::unique_ptr<IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(queueDescriptor, workloadInfo);
3663  inputHandle->Allocate();
3664  outputHandle->Allocate();
3665 
3666  CopyDataToITensorHandle(inputHandle.get(), inputData.data());
3667 
3668  ExecuteWorkload(*workload, memoryManager);
3669 
3670  LayerTestResult<uint8_t, 4> ret(outputInfo);
3671 
3672  CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
3673  ret.outputExpected = MakeTensor<uint8_t, 4>(outputInfo, expectedOutputData);
3674 
3675  return ret;
3676 }
3677 
3679  armnn::IWorkloadFactory& workloadFactory,
3680  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3681  armnn::IWorkloadFactory& refWorkloadFactory,
3682  const armnn::DataLayout layout)
3683 {
3684  return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::Float32>(
3685  workloadFactory, memoryManager, refWorkloadFactory, layout);
3686 }
3687 
3689  armnn::IWorkloadFactory& workloadFactory,
3690  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3691  armnn::IWorkloadFactory& refWorkloadFactory,
3692  const armnn::DataLayout layout)
3693 {
3694  return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::QAsymmU8>(
3695  workloadFactory, memoryManager, refWorkloadFactory, layout);
3696 }
LayerTestResult< float, 4 > DepthwiseConvolution2dDepthMul64Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager)
uint32_t m_PadBottom
Padding bottom value in the height dimension.
bool m_BiasEnabled
Enable/disable bias.
LayerTestResult< T, 4 > SimpleConvolution2d3x3TestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< float, 4 > CompareDepthwiseConvolution2dFloatTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::IWorkloadFactory &refWorkloadFactory, const armnn::DataLayout layout)
const ConstCpuTensorHandle * m_Bias
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
LayerTestResult< uint8_t, 4 > DepthwiseConvolution2dDepthMul1Uint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::DataLayout layout, float qScale, int32_t qOffset)
LayerTestResult< O, 4 > SimpleConvolution2dNhwcTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const boost::multi_array< T, 4 > &input, const boost::multi_array< T, 4 > &kernel, const boost::multi_array< B, 1 > &bias, const boost::multi_array< O, 4 > &outputExpected, const armnn::DataLayout dataLayout, float qScale, int32_t qOffset, uint32_t padLeft=1, uint32_t padTop=1, uint32_t padRight=1, uint32_t padBottom=1, uint32_t strideX=1, uint32_t strideY=1)
bool m_BiasEnabled
Enable/disable bias.
LayerTestResult< float, 4 > SimpleConvolution2d3x5Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
DataLayout
Definition: Types.hpp:49
const TensorShape & GetShape() const
Definition: Tensor.hpp:88
uint32_t m_PadBottom
Padding bottom value in the height dimension.
LayerTestResult< uint8_t, 4 > DepthwiseConvolution2dPerAxisQuantTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::DataLayout layout)
void ApplyBias(std::vector< T > &v, float vScale, int32_t vOffset, const std::vector< B > &bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h)
LayerTestResult< float, 4 > Convolution1dTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled)
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
LayerTestResult< float, 4 > Convolution2dAsymmetricPaddingTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::DataLayout layout)
LayerTestResult< uint8_t, 4 > DepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
A Convolution2dDescriptor for the Convolution2dLayer.
uint32_t m_PadLeft
Padding left value in the width dimension.
LayerTestResult< T, 4 > DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > DepthwiseConvolution2dDepthMul1TestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout)
boost::multi_array< T, n > outputExpected
LayerTestResult< T, 4 > CompareDepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::IWorkloadFactory &refWorkloadFactory, const armnnUtils::DataLayoutIndexed &layout)
LayerTestResult< uint8_t, 4 > SimpleConvolution2d3x3Uint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > DepthwiseConvolution2d3x3DilationTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const std::vector< float > &inputNoQuantizedValues, armnn::TensorInfo &inputTensorInfo, const std::vector< float > &kernelNoQuantizedValues, armnn::TensorInfo &kernelTensorInfo, const std::vector< float > &outputExpectedNoQuantizedValues, armnn::TensorInfo &outputTensorInfo, uint32_t dilationX, uint32_t dilationY, armnn::DataLayout layout=armnn::DataLayout::NCHW, bool biasEnabled=false)
LayerTestResult< T, 4 > SimpleConvolution2dTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const boost::multi_array< T, 4 > &originalInput, const boost::multi_array< T, 4 > &originalKernel, const boost::multi_array< B, 1 > &bias, const boost::multi_array< T, 4 > &originalOutputExpected, float qScale, int32_t qOffset, const armnn::DataLayout layout=armnn::DataLayout::NCHW, uint32_t padLeft=0, uint32_t padTop=0, uint32_t padRight=0, uint32_t padBottom=0, uint32_t strideX=1, uint32_t strideY=1, uint32_t dilationX=1, uint32_t dilationY=1)
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:73
uint32_t m_PadRight
Padding right value in the width dimension.
LayerTestResult< int16_t, 4 > SimpleConvolution2d3x3QSymm16Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
Copyright (c) 2020 ARM Limited.
LayerTestResult< T, 4 > Convolution1dTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled)
void IgnoreUnused(Ts &&...)
uint32_t m_DilationY
Dilation along y axis.
uint32_t m_DilationY
Dilation factor value for height dimension.
LayerTestResult< float, 4 > SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager)
LayerTestResult< float, 4 > Convolution2d3x3Stride2x2BFloat16Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout &dataLayout)
LayerTestResult< float, 4 > DepthwiseConvolution2dDepthNhwcTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled)
LayerTestResult< T, 4 > Convolution2d3x3Dilation3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
uint32_t m_PadTop
Padding top value in the height dimension.
LayerTestResult< uint8_t, 4 > Convolution2dPerAxisQuantTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::DataLayout layout)
void Permute(const armnn::TensorShape &dstShape, const armnn::PermutationVector &mappings, const void *src, void *dst, size_t dataTypeSize)
Definition: Permute.cpp:121
LayerTestResult< T, 4 > Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
LayerTestResult< T, 4 > CompareConvolution2dTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::IWorkloadFactory &refWorkloadFactory)
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
DataType
Definition: Types.hpp:32
uint32_t m_DilationX
Dilation factor value for width dimension.
uint32_t m_PadTop
Padding top value in the height dimension.
LayerTestResult< int16_t, 4 > SimpleConvolution2d3x5QSymm16Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
int32_t GetQuantizationOffset() const
Definition: Tensor.cpp:265
float GetQuantizationScale() const
Definition: Tensor.cpp:248
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout...
const ConstCpuTensorHandle * m_Weight
LayerTestResult< T, 4 > DepthwiseConvolution2dAsymmetricTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const boost::multi_array< T, 4 > &input, const boost::multi_array< T, 4 > &kernel, const boost::multi_array< B, 1 > &bias, const boost::multi_array< T, 4 > &outputExpected, float qScale, int32_t qOffset, const armnn::DataLayout layout, uint32_t padLeft=0, uint32_t padTop=0, uint32_t padRight=0, uint32_t padBottom=0, uint32_t strideX=1, uint32_t strideY=1)
LayerTestResult< T, 4 > DepthwiseConvolution2d2x3x3Dilation3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > DepthwiseConvolution2dMult2Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
boost::multi_array< T, 1 > GetBias2(bool biasEnabled, float qScale)
void SetQuantizationScale(float scale)
Definition: Tensor.cpp:260
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
LayerTestResult< int16_t, 4 > DepthwiseConvolution2dDepthMul1Int16Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled)
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:33
LayerTestResult< float, 4 > SimpleConvolution2d3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
void AllocateAndCopyDataToITensorHandle(armnn::ITensorHandle *tensorHandle, const void *memory)
armnn::DataLayout GetDataLayout() const
LayerTestResult< T, 4 > DepthwiseConvolution2dNhwcTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled)
void CopyDataFromITensorHandle(void *memory, const armnn::ITensorHandle *tensorHandle)
boost::multi_array< T, 1 > GetBias4(bool biasEnabled, float qScale)
LayerTestResult< T, 4 > Convolution2d2x3x3Dilation3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< float, 4 > CompareConvolution2dTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::IWorkloadFactory &refWorkloadFactory)
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
virtual std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo, const bool IsMemoryManaged=true) const =0
DataType GetBiasDataType(DataType inputDataType)
LayerTestResult< float, 4 > DepthwiseConvolution2dAsymmetricTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
boost::multi_array< T, n > output
LayerTestResult< float, 4 > Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::DataLayout layout)
LayerTestResult< T, 4 > Convolution2d3x3DilationTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const std::vector< float > &inputNoQuantizedValues, armnn::TensorInfo &inputTensorInfo, const std::vector< float > &kernelNoQuantizedValues, armnn::TensorInfo &kernelTensorInfo, const std::vector< float > &outputExpectedNoQuantizedValues, armnn::TensorInfo &outputTensorInfo, uint32_t dilationX, uint32_t dilationY, armnn::DataLayout layout=armnn::DataLayout::NCHW, uint32_t padLeft=0, uint32_t padTop=0, uint32_t padRight=0, uint32_t padBottom=0, uint32_t strideX=1, uint32_t strideY=1, bool biasEnabled=false)
uint32_t m_DilationX
Dilation along x axis.
boost::multi_array< T, 1 > GetBias8(bool biasEnabled, float qScale)
LayerTestResult< float, 4 > SimpleConvolution2d3x3NhwcTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled)
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
LayerTestResult< T, 4 > Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::DataLayout layout, float qScale, int32_t qOffset)
LayerTestResult< T, 4 > SimpleConvolution2d3x3Stride2x2TestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout &dataLayout)
boost::multi_array< T, 1 > GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout)
armnn::TensorInfo GetTensorInfo(unsigned int numberOfBatches, unsigned int numberOfChannels, unsigned int height, unsigned int width, const armnn::DataLayout dataLayout, const armnn::DataType dataType)
Definition: TensorUtils.cpp:38
Contains information about inputs and outputs to a layer.
LayerTestResult< float, 4 > DepthwiseConvolution2dTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > SimpleConvolution2d3x5TestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout)
float SelectiveDequantize(T value, float scale, int32_t offset)
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:276
LayerTestResult< T, 4 > DepthwiseConvolution2d3x3Dilation3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< uint8_t, 4 > Convolution1dUint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled)
LayerTestResult< T, 4 > DepthwiseConvolution2dAsymmetricTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< uint8_t, 4 > CompareDepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::IWorkloadFactory &refWorkloadFactory, const armnn::DataLayout layout)
LayerTestResult< uint8_t, 4 > SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > SimpleConvolution2d3x3NhwcTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, armnn::DataLayout dataLayout)
LayerTestResult< int16_t, 4 > DepthwiseConvolution2dInt16Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< float, 4 > Convolution2d3x3Stride2x2BFloat16SmallValueTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout &dataLayout)
virtual std::unique_ptr< IWorkload > CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const
unsigned int GetChannelsIndex() const
LayerTestResult< float, 4 > DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > DepthwiseConvolution2dMult4Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< float, 4 > SimpleConvolution2d3x3Stride2x2Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
uint32_t m_PadLeft
Padding left value in the width dimension.
virtual std::unique_ptr< IWorkload > CreateConvolution2d(const Convolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const
void CopyDataToITensorHandle(armnn::ITensorHandle *tensorHandle, const void *memory)
uint32_t m_PadRight
Padding right value in the width dimension.
void PermuteTensorNhwcToNchw(armnn::TensorInfo &tensorInfo, std::vector< T > &tensorData)