ArmNN  NotReleased
Conv2dTestImpl.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "Conv2dTestImpl.hpp"
7 
8 #include <QuantizeHelper.hpp>
10 
11 
13 #include <armnnUtils/Permute.hpp>
14 
16 
20 
21 #include <test/TensorHelpers.hpp>
22 
23 #include <boost/numeric/conversion/cast.hpp>
24 
25 #include <string>
26 
27 //
28 // Static data
29 //
30 
31 // 2-channel bias used by a number of Conv2d tests.
32 static std::vector<float> Bias2({0, 2});
33 
34 static std::vector<float> Bias4({1, 2, 3, 4});
35 
36 static std::vector<float> Bias8({1, 2, 3, 4, 1, 2, 3, 4});
37 
38 // 3-channel 16x8 image used as common input data for a number of Conv2d tests.
39 static std::vector<float> ConvInput3x8x16({
40  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
41  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
42  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
43  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
44  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
45  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
46  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
47  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
48  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
56  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
57  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
58  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
59  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
60  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
61  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
62  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
63  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
64 });
65 
66 using namespace armnnUtils;
67 
68 //
69 // Helper templates
70 //
71 
72 // Helper template that returns either Bias2 or an empty vector depending on whether bias is enabled.
73 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
74 boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale)
75 {
76  if(biasEnabled)
77  {
78  armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, ArmnnType);
79  boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias2, qScale, 0.0f));
80  return bias;
81  }
82  else
83  {
84  return boost::multi_array<T, 1>();
85  }
86 }
87 
88 // Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
89 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
90 boost::multi_array<T, 1> GetBias4(bool biasEnabled, float qScale)
91 {
92  if(biasEnabled)
93  {
94  armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
95  boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias4, qScale, 0.0f));
96  return bias;
97  }
98  else
99  {
100  return boost::multi_array<T, 1>();
101  }
102 }
103 
104 // Helper template that returns either Bias8 or an empty vector depending on whether bias is enabled.
105 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
106 boost::multi_array<T, 1> GetBias8(bool biasEnabled, float qScale)
107 {
108  if(biasEnabled)
109  {
110  armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
111  boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias8, qScale, 0.0f));
112  return bias;
113  }
114  else
115  {
116  return boost::multi_array<T, 1>();
117  }
118 }
119 
120 // Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
121 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
122 boost::multi_array<T, 1> GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout)
123 {
124  const armnnUtils::DataLayoutIndexed dataLayoutIndexed(layout);
125  const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
126  const unsigned int outputChannels = outputInfo.GetShape()[channelsIndex];
127 
128  switch (outputChannels)
129  {
130  case 2:
131  default:
132  {
133  return GetBias2<ArmnnType>(biasEnabled, qScale);
134  }
135  case 4:
136  {
137  return GetBias4<ArmnnType>(biasEnabled, qScale);
138  }
139  case 8:
140  {
141  return GetBias8<ArmnnType>(biasEnabled, qScale);
142  }
143  }
144 }
145 
146 //
147 // Implementation templates
148 //
149 
150 // Mapping from input type to bias type for fully connected layers.
151 // float => float, uint8_t => int32_t
152 template<typename T>
153 struct FullyConnectedBiasTypeForInputType;
154 
155 template<>
156 struct FullyConnectedBiasTypeForInputType<float>
157 {
158  using Type = float;
159 };
160 
161 template<>
162 struct FullyConnectedBiasTypeForInputType<uint8_t>
163 {
164  using Type = int32_t;
165 };
166 
167 // Modifies a std::vector in-place using a specified bias.
168 template<typename T, typename B>
169 void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset,
170  const std::vector<B>& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h)
171 {
172  BOOST_ASSERT_MSG((armnn::IsQuantizedType<T>() && vScale != 0.0f) || (!armnn::IsQuantizedType<T>()),
173  "Invalid type and parameter combination.");
174  BOOST_ASSERT_MSG((armnn::IsQuantizedType<B>() && bScale != 0.0f) || (!armnn::IsQuantizedType<B>()),
175  "Invalid type and parameter combination.");
176 
177  // Note we need to dequantize and re-quantize the image value and the bias.
178  for (uint32_t i = 0; i < bias.size(); ++i)
179  {
180  float dBias = SelectiveDequantize(bias[i], bScale, bOffset);
181  for (uint32_t y = 0; y < h; ++y)
182  {
183  for (uint32_t x = 0; x < w; ++x)
184  {
185  uint32_t offset = (i * h + y) * w + x;
186  BOOST_ASSERT(offset < v.size());
187  T& outRef = v[offset];
188  float dOutput = SelectiveDequantize(outRef, vScale, vOffset);
189  outRef = SelectiveQuantize<T>(dOutput + dBias, vScale, vOffset);
190  }
191  }
192  }
193 }
194 
195 //
196 // Convolution2d implementations
197 //
198 
199 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
202  armnn::IWorkloadFactory& workloadFactory,
204  const boost::multi_array<T, 4>& originalInput,
205  const boost::multi_array<T, 4>& originalKernel,
206  const boost::multi_array<B, 1>& bias,
207  const boost::multi_array<T, 4>& originalOutputExpected,
208  float qScale,
209  int32_t qOffset,
211  uint32_t padLeft = 0,
212  uint32_t padTop = 0,
213  uint32_t padRight = 0,
214  uint32_t padBottom = 0,
215  uint32_t strideX = 1,
216  uint32_t strideY = 1,
217  uint32_t dilationX = 1,
218  uint32_t dilationY = 1)
219 {
220  boost::ignore_unused(memoryManager);
221  unsigned int inputHeight = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
222  unsigned int inputWidth = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
223  unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
224  unsigned int inputNum = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
225 
226  unsigned int outputHeight = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
227  unsigned int outputWidth = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
228  unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
229  unsigned int outputNum = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
230 
231  unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
232  unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
233  unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
234  unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
235 
236  bool biasEnabled = bias.size() > 0;
237 
238  // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
239  BOOST_ASSERT(inputNum == 1);
240  BOOST_ASSERT(outputNum == 1);
241 
242  // If a bias is used, its size must equal the number of output channels.
243  BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels);
244 
245 
246  // Note these tensors will use two (identical) batches.
247  armnn::TensorInfo inputTensorInfo =
248  armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
249  armnn::TensorInfo outputTensorInfo =
250  armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
251  armnn::TensorInfo kernelDesc =
252  armnnUtils::GetTensorInfo(kernelDepthMul, kernelChannels, kernelHeight, kernelWidth, layout, ArmnnType);
253  armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
254 
255  // Set quantization parameters if the requested type is a quantized type.
256  if(armnn::IsQuantizedType<T>())
257  {
258  inputTensorInfo.SetQuantizationScale(qScale);
259  inputTensorInfo.SetQuantizationOffset(qOffset);
260  outputTensorInfo.SetQuantizationScale(qScale);
261  outputTensorInfo.SetQuantizationOffset(qOffset);
262  kernelDesc.SetQuantizationScale(qScale);
263  kernelDesc.SetQuantizationOffset(qOffset);
264  biasDesc.SetQuantizationScale(qScale*qScale);
265  biasDesc.SetQuantizationOffset(0);
266  }
267 
268  LayerTestResult<T, 4> ret(outputTensorInfo);
269 
270  // Construct input data - two batches of the same input image.
271  std::vector<T> inputImage;
272  inputImage.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth);
273  std::vector<T> inputData;
274  inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
275  inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
276 
277  // at this point if we require it permute the input data
278  const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
279  if (layout == armnn::DataLayout::NHWC)
280  {
281  std::vector<T> tmp(inputData.size());
282  armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
283  inputData = tmp;
284  }
285 
286  auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
287 
288  std::vector<T> outputImage;
289  outputImage.assign(originalOutputExpected.data(),
290  originalOutputExpected.data() + outputChannels*outputHeight*outputWidth);
291 
292  // Apply bias to output image if it is enabled.
293  if(biasEnabled)
294  {
295  std::vector<T> biasV;
296  biasV.assign(bias.data(), bias.data() + outputChannels);
297  ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
298  biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
299  outputWidth, outputHeight);
300  }
301 
302  // Construct expected output data - two identical images.
303  std::vector<T> outputData;
304  outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
305  outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
306 
307  // at this point if we require it permute the expected output
308  if (layout == armnn::DataLayout::NHWC)
309  {
310  std::vector<T> tmp(outputData.size());
311  armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
312  outputData = tmp;
313  }
314  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
315 
316  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
317  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
318 
321  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
322  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
323  // Permute the kernel if necessary
324  boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
325  if (layout == armnn::DataLayout::NHWC)
326  {
327  armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data(), sizeof(T));
328  }
329  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
330 
331  if(biasEnabled)
332  {
333  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
334  }
335 
336  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
337  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
338 
339  data.m_Weight = &weightsTensor;
340  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
341  data.m_Parameters.m_StrideX = strideX;
342  data.m_Parameters.m_StrideY = strideY;
343  data.m_Parameters.m_PadLeft = padLeft;
344  data.m_Parameters.m_PadRight = padRight;
345  data.m_Parameters.m_PadTop = padTop;
346  data.m_Parameters.m_PadBottom = padBottom;
347  data.m_Parameters.m_BiasEnabled = biasEnabled;
348  data.m_Parameters.m_DataLayout = layout;
349  data.m_Parameters.m_DilationX = dilationX;
350  data.m_Parameters.m_DilationY = dilationY;
351 
352  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
353  inputHandle->Allocate();
354  outputHandle->Allocate();
355 
356  CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
357 
358  ExecuteWorkload(*workload, memoryManager);
359 
360  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
361 
362  return ret;
363 }
364 
365 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
368  armnn::IWorkloadFactory& workloadFactory,
370  const boost::multi_array<T, 4>& input,
371  const boost::multi_array<T, 4>& kernel,
372  const boost::multi_array<B, 1>& bias,
373  const boost::multi_array<T, 4>& outputExpected,
374  const armnn::DataLayout dataLayout,
375  float qScale,
376  int32_t qOffset,
377  uint32_t padLeft = 1,
378  uint32_t padTop = 1,
379  uint32_t padRight = 1,
380  uint32_t padBottom = 1,
381  uint32_t strideX = 1,
382  uint32_t strideY = 1)
383 {
384  boost::ignore_unused(qScale, qOffset);
385  unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]);
386  unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[3]);
387  unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[1]);
388  unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[2]);
389 
390  unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
391  unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
392  unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
393  unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
394 
395  unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
396  unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
397  unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
398  unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
399 
400  bool biasEnabled = bias.size() > 0;
401 
402  // Creates the tensors.
403  armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, ArmnnType);
404  armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels},
405  ArmnnType);
406  armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, ArmnnType);
407  armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
408 
409  // Construct the input data.
410  std::vector<T> inputData;
411  inputData.assign(input.data(), input.data() + inputHeight*inputWidth*inputChannels);
412  auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
413 
414  // Construct the output data, with bias applied, as appropriate.
415  std::vector<T> outputData;
416  outputData.assign(outputExpected.data(), outputExpected.data() + outputHeight*outputWidth*outputChannels);
417 
418  LayerTestResult<T, 4> ret(outputTensorInfo);
419  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
420 
421  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
422  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
423 
424  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
425  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
426 
427  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
428 
430 
431  data.m_Weight = &weightsTensor;
432  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
433  data.m_Parameters.m_StrideX = strideX;
434  data.m_Parameters.m_StrideY = strideY;
435  data.m_Parameters.m_PadLeft = padLeft;
436  data.m_Parameters.m_PadRight = padRight;
437  data.m_Parameters.m_PadTop = padTop;
438  data.m_Parameters.m_PadBottom = padBottom;
439  data.m_Parameters.m_BiasEnabled = biasEnabled;
440  data.m_Parameters.m_DataLayout = dataLayout;
441 
443  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
444  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
445 
446  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
447  inputHandle->Allocate();
448  outputHandle->Allocate();
449 
450  CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
451 
452  ExecuteWorkload(*workload, memoryManager);
453 
454  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
455 
456  return ret;
457 }
458 
459 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
461  armnn::IWorkloadFactory& workloadFactory,
463  float qScale,
464  int32_t qOffset,
465  bool biasEnabled)
466 {
468  // Until we have a specialist 1D convolution layer, we can fake one using
469  // 2D convolution with the final dimension set to 1.
470  // I don't anticipate this being particularly slow, given that convolution is implemented
471  // as a matrix multiplication, at which point dimension doesn't matter.
472 
473  unsigned int batchSize = 1;
474  unsigned int inputChannels = 2;
475  unsigned int outputChannels = 3;
476  unsigned int inputSize = 5; // The 1D size (could view as 'width' or 'height').
477  unsigned int kernelSize = 3;
478  unsigned int padSize = 2;
479  unsigned int stride = 1;
480  unsigned int outputSize = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride.
481 
482  armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, ArmnnType);
483  armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, ArmnnType);
484  armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, ArmnnType);
485  armnn::TensorInfo biasInfo({outputChannels}, ArmnnBType);
486 
487  // Set quantization parameters if the requested type is a quantized type.
488  if(armnn::IsQuantizedType<T>())
489  {
490  inputInfo.SetQuantizationScale(qScale);
491  inputInfo.SetQuantizationOffset(qOffset);
492  outputInfo.SetQuantizationScale(qScale);
493  outputInfo.SetQuantizationOffset(qOffset);
494  kernelInfo.SetQuantizationScale(qScale);
495  kernelInfo.SetQuantizationOffset(qOffset);
496  biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale());
497  biasInfo.SetQuantizationOffset(0);
498  }
499 
500  std::vector<T> inputData = QuantizedVector<T>(
501  {
502  5.0f, -2.0f, 2.5f, 0.0f, 1.0f,
503  -3.0f, 3.2f, 5.0f, 2.0f, 3.0f,
504  },
505  inputInfo.GetQuantizationScale(),
506  inputInfo.GetQuantizationOffset());
507 
508  std::vector<T> kernelData = QuantizedVector<T>(
509  {
510  1.0f, 0.0f, 0.0f,
511  0.0f, 2.0f, -1.5f,
512 
513  0.0f, 0.0f, 0.0f,
514  0.2f, 0.2f, 0.2f,
515 
516  0.5f, 0.0f, 0.5f,
517  0.0f, -1.0f, 0.0f
518  },
519  kernelInfo.GetQuantizationScale(),
520  kernelInfo.GetQuantizationOffset());
521 
522  std::vector<B> biasData =
523  QuantizedVector<B>({ 1.0f, 0.0f, 0.0f }, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset());
524 
525  std::vector<T> outputData = QuantizedVector<T>(
526  {
527  4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f,
528  -0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f,
529  2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f
530  },
531  outputInfo.GetQuantizationScale(),
532  outputInfo.GetQuantizationOffset());
533 
534  // Optionally apply bias to output image.
535  if(biasEnabled)
536  {
537  ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(),
538  biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(),
539  1, outputSize);
540  }
541 
542  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
543  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
544 
547  armnn::ScopedCpuTensorHandle weightsTensor(kernelInfo);
548  armnn::ScopedCpuTensorHandle biasTensor(biasInfo);
549 
550  AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data());
551  AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
552 
553  AddInputToWorkload(data, info, inputInfo, inputHandle.get());
554  AddOutputToWorkload(data, info, outputInfo, outputHandle.get());
555 
556  data.m_Weight = &weightsTensor;
557  data.m_Bias = &biasTensor;
558  data.m_Parameters.m_StrideX = 1;
559  data.m_Parameters.m_StrideY = stride;
560  data.m_Parameters.m_PadLeft = 0;
561  data.m_Parameters.m_PadRight = 0;
562  data.m_Parameters.m_PadTop = padSize;
563  data.m_Parameters.m_PadBottom = padSize;
564  data.m_Parameters.m_BiasEnabled = biasEnabled;
565 
566  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
567  inputHandle->Allocate();
568  outputHandle->Allocate();
569 
570  CopyDataToITensorHandle(inputHandle.get(), inputData.data());
571 
572  ExecuteWorkload(*workload, memoryManager);
573 
574  // Output
575  LayerTestResult<T,4> ret(outputInfo);
576  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
577  ret.outputExpected = MakeTensor<T, 4>(outputInfo, outputData);
578  return ret;
579 }
580 
581 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
583  armnn::IWorkloadFactory& workloadFactory,
585  float qScale,
586  int32_t qOffset,
587  bool biasEnabled,
588  armnn::DataLayout dataLayout)
589 {
590  boost::ignore_unused(biasEnabled);
591  // Use common single-batch 5x5 image.
592 
593  armnn::TensorInfo inputDesc({1, 3, 4, 1}, ArmnnType);
594  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
595  {
596  1, 5, 2, 3,
597  8, 7, 3, 6,
598  3, 3, 9, 1
599  });
600 
601 
602  // Use a 2-element batch of 3-channel 3x3 kernels.
603  armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
604  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, {
605  4, 5, 6,
606  0, 0, 0,
607  3, 2, 1
608  });
609 
610  // Expected output is 1 batch of a 5x5 image.
611  armnn::TensorInfo outputDesc({1, 3, 4, 1}, ArmnnType);
612 
613  const std::vector<float> outputData =
614  {
615  23, 41, 33, 21,
616  44, 65, 76, 52,
617  82, 85, 79, 42
618  };
619 
620  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
621 
622  return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
623  workloadFactory,
624  memoryManager,
625  input,
626  kernel,
627  boost::multi_array<T, 1>(),
628  expectedOutput,
629  dataLayout,
630  qScale,
631  qOffset);
632 }
633 
634 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
636  armnn::IWorkloadFactory& workloadFactory,
638  float qScale,
639  int32_t qOffset,
640  bool biasEnabled,
641  const armnn::DataLayout& dataLayout)
642 {
643  boost::ignore_unused(biasEnabled);
644 
645  // Input is a single-batch, 1 channel, 5x5 image.
646  armnn::TensorInfo inputDesc({1, 5, 5, 1}, ArmnnType);
647  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
648  {
649  1, 5, 2, 3, 5,
650  8, 7, 3, 6, 3,
651  3, 3, 9, 1, 9,
652  4, 1, 8, 1, 3,
653  6, 8, 1, 9, 2
654  });
655 
656  // Use a 3x3 kernel.
657  armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
658  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc,
659  {
660  4, 5, 6,
661  0, 0, 0,
662  3, 2, 1
663  });
664 
665  // Expected output is a single-batch, 1 channel, 3x3 image.
666  armnn::TensorInfo outputDesc({1, 3, 3, 1}, ArmnnType);
667 
668  const std::vector<T> outputData =
669  {
670  23, 33, 24,
671  91, 99, 48,
672  26, 50, 19
673  };
674 
675  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
676 
677  uint32_t padLeft = 1;
678  uint32_t padTop = 1;
679  uint32_t padRight = 1;
680  uint32_t padBottom = 1;
681  uint32_t strideX = 2;
682  uint32_t strideY = 2;
683 
684  return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
685  workloadFactory,
686  memoryManager,
687  input,
688  kernel,
689  boost::multi_array<T, 1>(),
690  expectedOutput,
691  dataLayout,
692  qScale,
693  qOffset,
694  padLeft,
695  padTop,
696  padRight,
697  padBottom,
698  strideX,
699  strideY);
700 }
701 
702 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
704  armnn::IWorkloadFactory& workloadFactory,
706  float qScale,
707  int32_t qOffset,
708  bool biasEnabled,
709  const armnn::DataLayout layout)
710 {
711  // Use common single-batch 3-channel 16x8 image.
712  armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
713  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset));
714 
715  // Use a 2-element batch with 3-channel 3x5 kernels.
716  armnn::TensorInfo kernelDesc({2, 3, 5, 3}, ArmnnType);
717  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
718  QuantizedVector<T>({
719  1, 1, 1,
720  1, -1, 1,
721  1, 1, 1,
722  1, 1, 1,
723  1, 1, 1,
724 
725  0, 0, 0,
726  0, 0, 0,
727  0, 0, 0,
728  0, 0, 0,
729  0, 0, 0,
730 
731  2, 2, 2,
732  2, 2, 2,
733  2, 2, 2,
734  2, 2, 2,
735  2, 2, 2,
736 
737 
738  0, 0, 0,
739  0, 0, 0,
740  0, 0, 0,
741  0, 0, 0,
742  0, 0, 0,
743 
744  1, 1, 1,
745  1, 1, 1,
746  1, 1, 1,
747  1, 1, 1,
748  1, 1, 1,
749 
750  0, 0, 0,
751  0, 0, 0,
752  0, 0, 0,
753  0, 0, 0,
754  0, 0, 0
755  },
756  qScale, qOffset)));
757 
758  // Expected output is 2 batch elements of a 1-channel 14x4 image.
759  armnn::TensorInfo outputDesc({1, 2, 4, 14}, ArmnnType);
760  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
761  QuantizedVector<T>({
762  -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
763  -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
764  -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
765  -23.5f, -23.5f, -23.5f,
766  -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
767  -23.5f, -23.5f, -23.5f,
768 
769  5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
770  5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
771  5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
772  5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
773  },
774  qScale, qOffset)));
775 
776  return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
777  workloadFactory,
778  memoryManager,
779  input,
780  kernel,
781  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
782  expectedOutput,
783  qScale,
784  qOffset,
785  layout);
786 }
787 
788 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
789  typename T = armnn::ResolveType<ArmnnType>>
791  armnn::IWorkloadFactory& workloadFactory,
793  float qScale,
794  int32_t qOffset,
795  bool biasEnabled,
796  const armnn::DataLayout layout)
797 {
798  // Use a 3x3 kernel, which exercises ArmCompute's direct convolution path.
799 
800  // Use common single-batch 3-channel 16x8 image.
801  armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
802  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset));
803 
804  // Use a 2-element batch of 3-channel 3x3 kernels.
805  armnn::TensorInfo kernelDesc({2, 3, 3, 3}, ArmnnType);
806  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
807  QuantizedVector<T>({
808  1, 1, 1,
809  1, -1, 1,
810  1, 1, 1,
811 
812  0, 0, 0,
813  0, 0, 0,
814  0, 0, 0,
815 
816  2, 2, 2,
817  2, 2, 2,
818  2, 2, 2,
819 
820 
821  0, 0, 0,
822  0, 0, 0,
823  0, 0, 0,
824 
825  1, 1, 1,
826  1, 1, 1,
827  1, 1, 1,
828 
829  0, 0, 0,
830  0, 0, 0,
831  0, 0, 0
832  },
833  qScale, qOffset)));
834 
835  // Expected output is 1 batch of a 2-channel 14x6 image.
836  armnn::TensorInfo outputDesc({1, 2, 6, 14}, ArmnnType);
837  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
838  QuantizedVector<T>({
839  -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
840  -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
841  -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
842  -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
843  -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
844  -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
845 
846  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
847  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
848  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
849  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
850  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
851  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
852  },
853  qScale, qOffset)));
854 
855  return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
856  workloadFactory,
857  memoryManager,
858  input,
859  kernel,
860  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
861  expectedOutput,
862  qScale,
863  qOffset,
864  layout);
865 }
866 
867 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
868  typename T = armnn::ResolveType<ArmnnType>>
870  armnn::IWorkloadFactory& workloadFactory,
872  const armnn::DataLayout layout,
873  float qScale,
874  int32_t qOffset)
875 {
876  // Use a single-batch 1-channel 3x3 image as input.
877  armnn::TensorInfo inputDesc({1, 1, 3, 3}, ArmnnType);
878  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
879  QuantizedVector<T>({
880  11,21,31,
881  12,22,32,
882  13,23,33
883  },
884  qScale, qOffset)));
885 
886  // Use 1 batch of a 1-channel 2x2 kernel.
887  armnn::TensorInfo kernelDesc({1, 1, 2, 2}, ArmnnType);
888  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
889  QuantizedVector<T>({
890  -11,-21,
891  -12,-22,
892  },
893  qScale, qOffset)));
894 
895 // Expected output is 1 batch of a 1-channel 6x8 image.
896 // Manually calculated like this:
897 //[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..]
898 //[-11*0 -21*0 -12*0 -22*11 ; -11*0 -21*0 -12*11 -22*21 ; -11*0 -21*0 -12*21 -22*31 ; -11*0 -21*0 -12*31 -22*0 ..]
899 //[-11*0 -21*11 -12*0 -22*12 ; -11*11 -21*21 -12*12 -22*22 ; -11*21 -21*31 -12*22 -22*32 ; -11*31 -21*0 -12*32 -22*0 ..]
900 //[-11*0 -21*12 -12*0 -22*13 ; -11*12 -21*22 -12*13 -22*23 ; -11*22 -21*32 -12*23 -22*33 ; -11*32 -21*0 -12*33 -22*0 ..]
901 //[-11*0 -21*13 -12*0 -22*0 ; -11*13 -21*23 -12*0 -22*0 ; -11*23 -21*33 -12*0 -22*0 ; -11*33 -21*0 -12*0 -22*0 ..]
902 //[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..]
903 //[..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ..]
904  armnn::TensorInfo outputDesc({1, 1, 8, 6}, ArmnnType);
905  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
906  QuantizedVector<T>({
907  0, 0, 0, 0, 0, 0,
908  -242, -594, -934, -372, 0, 0,
909  -495, -1190, -1850, -725, 0, 0,
910  -538, -1256, -1916, -748, 0, 0,
911  -273, -626, -946, -363, 0, 0,
912  0, 0, 0, 0, 0, 0,
913  0, 0, 0, 0, 0, 0,
914  0, 0, 0, 0, 0, 0
915  },
916  qScale, qOffset)));
917 
918  return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
919  workloadFactory,
920  memoryManager,
921  input,
922  kernel,
923  GetBias2<ArmnnBType>(false, qScale * qScale),
924  expectedOutput,
925  qScale,
926  qOffset,
927  layout,
928  1, // Padding left.
929  2, // Padding top.
930  3, // Padding right.
931  4); // Padding bottom.
932 }
933 
934 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
935  typename T = armnn::ResolveType<ArmnnType>>
937  armnn::IWorkloadFactory& workloadFactory,
939  const armnn::DataLayout layout,
940  float qScale,
941  int32_t qOffset)
942 {
943  // Use a single-batch 1-channel 5x5 image as input.
944  armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, ArmnnType);
945  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
946  QuantizedVector<T>({
947  11,21,31,41,51,
948  12,22,32,42,52,
949  13,23,33,43,53,
950  14,24,34,44,54,
951  15,25,35,45,55,
952  }, qScale, qOffset)));
953 
954  // Use 1 batch of a 1-channel 4x4 kernel.
955  armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, ArmnnType);
956  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
957  QuantizedVector<T>({
958  -11,-21,-31,-41,
959  -12,-22,-32,-42,
960  -13,-23,-33,-43,
961  -14,-24,-34,-44,
962  },
963  qScale, qOffset)));
964 
965  // Expected output is 1 batch of a 1-channel 5x5 image.
966  armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, ArmnnType);
967  std::vector<T> myVec(outputDesc.GetNumElements(), 0);
968  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
969  QuantizedVector<T>({
970  -7140, -10580, -13940, -9300, -5230,
971  -9590, -14120, -18520, -12290, -6860,
972  -9980, -14560, -18960, -12560, -7000,
973  -7518, -10904, -14144, -9318, -5152,
974  -5032, -7256, -9376, -6142, -3368,
975  },
976  qScale, qOffset)));
977 
978  return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
979  workloadFactory,
980  memoryManager,
981  input,
982  kernel,
983  GetBias2<ArmnnBType>(false, qScale * qScale),
984  expectedOutput,
985  qScale,
986  qOffset,
987  layout,
988  1, // Padding left.
989  1, // Padding top.
990  2, // Padding right.
991  2); // Padding bottom.
992 }
993 
994 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
996  armnn::IWorkloadFactory& workloadFactory,
998  const std::vector<float>& inputNoQuantizedValues,
999  armnn::TensorInfo& inputTensorInfo,
1000  const std::vector<float>& kernelNoQuantizedValues,
1001  armnn::TensorInfo& kernelTensorInfo,
1002  const std::vector<float>& outputExpectedNoQuantizedValues,
1003  armnn::TensorInfo& outputTensorInfo,
1004  uint32_t dilationX,
1005  uint32_t dilationY,
1007  uint32_t padLeft = 0,
1008  uint32_t padTop = 0,
1009  uint32_t padRight = 0,
1010  uint32_t padBottom = 0,
1011  uint32_t strideX = 1,
1012  uint32_t strideY = 1,
1013  bool biasEnabled = false
1014 )
1015 {
1016  float qScale;
1017  int32_t qOffset;
1018  switch (ArmnnType)
1019  {
1021  {
1022  qScale = 0.1f;
1023  qOffset = 128;
1024  break;
1025  }
1027  {
1028  qScale = 0.1f;
1029  qOffset = 0;
1030  break;
1031  }
1033  default:
1034  {
1035  qScale = 0.f;
1036  qOffset = 0;
1037  break;
1038  }
1039  }
1040 
1041  inputTensorInfo.SetQuantizationScale(qScale);
1042  inputTensorInfo.SetQuantizationOffset(qOffset);
1043  kernelTensorInfo.SetQuantizationScale(qScale);
1044  kernelTensorInfo.SetQuantizationOffset(qOffset);
1045  outputTensorInfo.SetQuantizationScale(qScale);
1046  outputTensorInfo.SetQuantizationOffset(qOffset);
1047 
1048  auto input = MakeTensor<T, 4>(inputTensorInfo,
1049  std::vector<T>(QuantizedVector<T>(inputNoQuantizedValues,
1050  inputTensorInfo.GetQuantizationScale(),
1051  inputTensorInfo.GetQuantizationOffset())));
1052  auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
1053  std::vector<T>(QuantizedVector<T>(kernelNoQuantizedValues,
1054  kernelTensorInfo.GetQuantizationScale(),
1055  kernelTensorInfo.GetQuantizationOffset())));
1056  auto expectedOutput =
1057  MakeTensor<T, 4>(outputTensorInfo,
1058  std::vector<T>(QuantizedVector<T>(outputExpectedNoQuantizedValues,
1059  outputTensorInfo.GetQuantizationScale(),
1060  outputTensorInfo.GetQuantizationOffset())));
1061 
1062  return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
1063  workloadFactory,
1064  memoryManager,
1065  input,
1066  kernel,
1067  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
1068  expectedOutput,
1069  qScale,
1070  qOffset,
1071  layout,
1072  padLeft,
1073  padTop,
1074  padRight,
1075  padBottom,
1076  strideX,
1077  strideY,
1078  dilationX,
1079  dilationY);
1080 }
1081 
1082 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
1084  armnn::IWorkloadFactory& workloadFactory,
1086  bool biasEnabled,
1087  const armnn::DataLayout layout)
1088 {
1089  armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
1090  std::vector<float> inputNoQuantizedValues =
1091  {
1092  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1093  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1094  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1095  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1096  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1097  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1098  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1099  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1100  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1101  0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1102  };
1103 
1104  armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
1105  std::vector<float> kernelNoQuantizedValues =
1106  {
1107  1, 2, 3,
1108  4, 5, 6,
1109  7, 8, 9
1110  };
1111 
1112  // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
1113  // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
1114  armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
1115  std::vector<float> outputExpectedNoQuantizedValues =
1116  {
1117  6., 5., 5., 5.,
1118  6., 5., 5., 5.,
1119  6., 5., 5., 5.,
1120  3., 2., 2., 2.
1121  };
1122 
1123  return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1124  workloadFactory,
1125  memoryManager,
1126  inputNoQuantizedValues,
1127  inputTensorInfo,
1128  kernelNoQuantizedValues,
1129  kernelTensorInfo,
1130  outputExpectedNoQuantizedValues,
1131  outputTensorInfo,
1132  3,
1133  3,
1134  layout,
1135  biasEnabled);
1136 }
1137 
1138 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
1140  armnn::IWorkloadFactory& workloadFactory,
1142  bool biasEnabled,
1143  const armnn::DataLayout layout)
1144 {
1145  armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
1146  std::vector<float> inputNoQuantizedValues =
1147  {
1148  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1149  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1150  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1151  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1152  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1153  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1154  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1155  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1156  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1157  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1158 
1159  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1160  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1161  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1162  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1163  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1164  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1165  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1166  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1167  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1168  0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1169  };
1170 
1171  armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
1172  std::vector<float> kernelNoQuantizedValues =
1173  {
1174  1, 2, 3,
1175  4, 5, 6,
1176  7, 8, 9,
1177 
1178  1, 2, 3,
1179  4, 5, 6,
1180  7, 8, 9
1181  };
1182 
1183  // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
1184  // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
1185  armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
1186  std::vector<float> outputExpectedNoQuantizedValues =
1187  {
1188  12., 10., 10., 10.,
1189  12., 10., 10., 10.,
1190  12., 10., 10., 10.,
1191  6., 4., 4., 4.
1192  };
1193 
1194  return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1195  workloadFactory,
1196  memoryManager,
1197  inputNoQuantizedValues,
1198  inputTensorInfo,
1199  kernelNoQuantizedValues,
1200  kernelTensorInfo,
1201  outputExpectedNoQuantizedValues,
1202  outputTensorInfo,
1203  3,
1204  3,
1205  layout,
1206  biasEnabled);
1207 }
1208 
1209 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
1211  armnn::IWorkloadFactory &workloadFactory,
1213  bool biasEnabled,
1214  const armnn::DataLayout layout)
1215 {
1216  armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
1217  std::vector<float> inputNoQuantizedValues =
1218  {
1219  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1220  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1221  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1222  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1223  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1224  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1225  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1226  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1227  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1228  1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1229  };
1230 
1231  armnn::TensorInfo kernelTensorInfo({ 1, 1, 2, 2}, ArmnnType);
1232  std::vector<float> kernelNoQuantizedValues =
1233  {
1234  1, 2,
1235  3, 4
1236  };
1237 
1238  // Since the dilation rate is 2 this will dilate the kernel to be like 3x3: d(K-1)+1 --> 2 x (2-1) + 1 = 3,
1239  // therefore the output will be 4x4: (I − K + 2P)/S +1 => trunc ( (10 - 3 + 2x2 ) / 3 + 1 )
1240  // where, dilation size = d = 2; kernel size = K = 2; input size = I = 10; padding size = P = 2; stride = S = 3
1241  armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
1242  std::vector<float> outputExpectedNoQuantizedValues =
1243  {
1244  4, 7, 7, 3,
1245  6, 10, 10, 4,
1246  6, 10, 10, 4,
1247  2, 3, 3, 1
1248  };
1249  uint32_t padLeft = 1;
1250  uint32_t padTop = 1;
1251  uint32_t padRight = 1;
1252  uint32_t padBottom = 1;
1253 
1254  return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1255  workloadFactory,
1256  memoryManager,
1257  inputNoQuantizedValues,
1258  inputTensorInfo,
1259  kernelNoQuantizedValues,
1260  kernelTensorInfo,
1261  outputExpectedNoQuantizedValues,
1262  outputTensorInfo,
1263  2,
1264  2,
1265  layout,
1266  padLeft,
1267  padTop,
1268  padRight,
1269  padBottom,
1270  3,
1271  3,
1272  biasEnabled
1273  );
1274 }
1275 
1276 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
1278  armnn::IWorkloadFactory& workloadFactory,
1280  armnn::IWorkloadFactory& refWorkloadFactory)
1281 {
1282  unsigned int inputHeight = 8;
1283  unsigned int inputWidth = 16;
1284  unsigned int inputChannels = 3;
1285  unsigned int inputNum = 5;
1286 
1287  unsigned int kernelHeight = 3;
1288  unsigned int kernelWidth = 3;
1289 
1290  unsigned int strideX = 2;
1291  unsigned int strideY = 3;
1292  unsigned int padX = 1;
1293  unsigned int padY = 1;
1294 
1295  unsigned int outputNum = inputNum;
1296  unsigned int outputChannels = 2;
1297  unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
1298  unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
1299 
1300  armnn::TensorInfo inputTensorInfo;
1301  armnn::TensorInfo outputTensorInfo;
1302  armnn::TensorInfo kernelDesc;
1303  armnn::TensorInfo biasDesc;
1304 
1305  unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth};
1306  unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth};
1307  unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth};
1308  unsigned int biasShape[] = {outputChannels};
1309 
1310  inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
1311  outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
1312  kernelDesc = armnn::TensorInfo(4, kernelShape, ArmnnType);
1313  biasDesc = armnn::TensorInfo(1, biasShape, ArmnnType);
1314 
1315  LayerTestResult<T,4> ret(outputTensorInfo);
1316 
1317  auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908);
1318  auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234);
1319  auto bias = MakeRandomTensor<T, 1>(biasDesc, 1028);
1320 
1321  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
1322  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1323 
1326  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
1327  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
1328 
1329  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
1330  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
1331 
1332  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1333  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1334  data.m_Weight = &weightsTensor;
1335  data.m_Bias = &biasTensor;
1336  data.m_Parameters.m_StrideX = strideX;
1337  data.m_Parameters.m_StrideY = strideY;
1338  data.m_Parameters.m_PadLeft = padX;
1339  data.m_Parameters.m_PadRight = padX;
1340  data.m_Parameters.m_PadTop = padY;
1341  data.m_Parameters.m_PadBottom = padY;
1342  data.m_Parameters.m_BiasEnabled = true;
1343 
1344  std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
1345  std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
1346 
1347  armnn::Convolution2dQueueDescriptor refData = data;
1348  armnn::WorkloadInfo refInfo = info;
1349  SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
1350  SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
1351 
1352  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
1353  std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateConvolution2d(refData, refInfo);
1354 
1355  outputHandleRef->Allocate();
1356  inputHandleRef->Allocate();
1357 
1358  inputHandle->Allocate();
1359  outputHandle->Allocate();
1360 
1361  CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
1362  CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
1363 
1364  ExecuteWorkload(*workload, memoryManager);
1365 
1366  workloadRef->PostAllocationConfigure();
1367  workloadRef->Execute();
1368 
1369  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1370  CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
1371 
1372  return ret;
1373 }
1374 
1375 //
1376 // DepthwiseConvolution2d implementations
1377 //
1378 
1379 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
1382  armnn::IWorkloadFactory& workloadFactory,
1384  const boost::multi_array<T, 4>& input,
1385  const boost::multi_array<T, 4>& kernel,
1386  const boost::multi_array<B, 1>& bias,
1387  const boost::multi_array<T, 4>& outputExpected,
1388  float qScale,
1389  int32_t qOffset,
1390  const armnn::DataLayout layout,
1391  uint32_t padLeft = 0,
1392  uint32_t padTop = 0,
1393  uint32_t padRight = 0,
1394  uint32_t padBottom = 0,
1395  uint32_t strideX = 1,
1396  uint32_t strideY = 1)
1397 {
1398  unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]);
1399  unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]);
1400  unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]);
1401  unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]);
1402  unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
1403  unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
1404  unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
1405  unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
1406  unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
1407  unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
1408  unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
1409  unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
1410 
1411  // If a bias is used, its size must equal the number of output channels.
1412  bool biasEnabled = bias.size() > 0;
1413  BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels);
1414 
1415  // Creates the tensors.
1416  armnn::TensorInfo inputTensorInfo =
1417  armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
1418  armnn::TensorInfo outputTensorInfo =
1419  armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
1420  armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
1421  armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
1422 
1423  // Set quantization parameters if the requested type is a quantized type.
1424  if (armnn::IsQuantizedType<T>())
1425  {
1426  inputTensorInfo.SetQuantizationScale(qScale);
1427  inputTensorInfo.SetQuantizationOffset(qOffset);
1428  outputTensorInfo.SetQuantizationScale(qScale);
1429  outputTensorInfo.SetQuantizationOffset(qOffset);
1430  kernelDesc.SetQuantizationScale(qScale);
1431  kernelDesc.SetQuantizationOffset(qOffset);
1432  biasDesc.SetQuantizationScale(qScale*qScale);
1433  biasDesc.SetQuantizationOffset(0);
1434  }
1435 
1436  // Construct the input data.
1437  std::vector<T> inputData;
1438  inputData.assign(input.data(), input.data() + inputChannels*inputHeight*inputWidth);
1439 
1440  // At this point if we require it permute the input data
1441  const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
1442  if (layout == armnn::DataLayout::NHWC)
1443  {
1444  std::vector<T> tmp(inputData.size());
1445  armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
1446  inputData = tmp;
1447  }
1448 
1449  auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
1450 
1451  // Construct the output data, with bias applied, as appropriate.
1452  std::vector<T> outputData;
1453  outputData.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth);
1454  if (biasEnabled)
1455  {
1456  std::vector<T> biasV;
1457  biasV.assign(bias.data(), bias.data() + outputChannels);
1458  ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
1459  biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
1460  outputWidth, outputHeight);
1461  }
1462 
1463  LayerTestResult<T, 4> ret(outputTensorInfo);
1464 
1465  // At this point if we require it permute the expected output
1466  if (layout == armnn::DataLayout::NHWC)
1467  {
1468  std::vector<T> tmp(outputData.size());
1469  armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
1470  outputData = tmp;
1471  }
1472 
1473  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
1474 
1475  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
1476  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1477 
1478  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
1479 
1480  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
1481 
1482  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
1483  if (biasEnabled)
1484  {
1485  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
1486  }
1487 
1489  data.m_Weight = &weightsTensor;
1490  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs.
1491  data.m_Parameters.m_StrideX = strideX;
1492  data.m_Parameters.m_StrideY = strideY;
1493  data.m_Parameters.m_PadLeft = padLeft;
1494  data.m_Parameters.m_PadRight = padRight;
1495  data.m_Parameters.m_PadTop = padTop;
1496  data.m_Parameters.m_PadBottom = padBottom;
1497  data.m_Parameters.m_BiasEnabled = biasEnabled;
1498  data.m_Parameters.m_DataLayout = layout;
1499 
1501  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1502  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1503 
1504  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
1505  inputHandle->Allocate();
1506  outputHandle->Allocate();
1507 
1508  CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
1509 
1510  ExecuteWorkload(*workload, memoryManager);
1511 
1512  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1513 
1514  return ret;
1515 }
1516 
1517 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
1519  armnn::IWorkloadFactory& workloadFactory,
1521  float qScale,
1522  int32_t qOffset,
1523  bool biasEnabled,
1524  const armnn::DataLayout layout)
1525 {
1527 
1528  unsigned int inputHeight = 3;
1529  unsigned int inputWidth = 3;
1530  unsigned int inputChannels = 2;
1531  unsigned int inputNum = 1;
1532 
1533  unsigned int kernelHeight = 3;
1534  unsigned int kernelWidth = 3;
1535  unsigned int kernelChannels = inputChannels;
1536  unsigned int kernelDepthMultiplier = 1;
1537 
1538  unsigned int outputHeight = 1;
1539  unsigned int outputWidth = 1;
1540  unsigned int outputChannels = kernelChannels;
1541  unsigned int outputNum = inputNum;
1542 
1543  armnn::TensorInfo inputTensorInfo =
1544  armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
1545  armnn::TensorInfo outputTensorInfo =
1546  armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
1547  armnn::TensorInfo kernelDesc({kernelDepthMultiplier, kernelChannels, kernelHeight, kernelWidth},
1548  ArmnnType);
1549  armnn::TensorInfo biasDesc({ outputChannels }, ArmnnBType);
1550 
1551  // Set quantization parameters if the requested type is a quantized type.
1552  if(armnn::IsQuantizedType<T>())
1553  {
1554  inputTensorInfo.SetQuantizationScale(qScale);
1555  inputTensorInfo.SetQuantizationOffset(qOffset);
1556  outputTensorInfo.SetQuantizationScale(qScale);
1557  outputTensorInfo.SetQuantizationOffset(qOffset);
1558  kernelDesc.SetQuantizationScale(qScale);
1559  kernelDesc.SetQuantizationOffset(qOffset);
1560  biasDesc.SetQuantizationScale(qScale*qScale);
1561  biasDesc.SetQuantizationOffset(0);
1562  }
1563  std::vector<T> inputData = std::vector<T>(
1564  QuantizedVector<T>({
1565  1.f, 2.f, 1.f,
1566  2.f, 1.f, 2.f,
1567  1.f, 2.f, 1.f,
1568 
1569  1.f, 2.f, 1.f,
1570  2.f, 1.f, 2.f,
1571  1.f, 2.f, 1.f,
1572  },
1573  inputTensorInfo.GetQuantizationScale(),
1574  inputTensorInfo.GetQuantizationOffset()));
1575 
1576  // at this point if we require it permute the input data
1577  const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
1578  if (layout == armnn::DataLayout::NHWC)
1579  {
1580  std::vector<T> tmp(inputData.size());
1581  armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
1582  inputData = tmp;
1583  }
1584  auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
1585 
1586  std::vector<B> biasV(QuantizedVector<B>({ 0, 2 },
1587  biasDesc.GetQuantizationScale(),
1588  biasDesc.GetQuantizationOffset()));
1589 
1590  auto bias = MakeTensor<B, 1>(biasDesc, biasV);
1591 
1592  std::vector<T> kernelData = std::vector<T>(
1593  QuantizedVector<T>({
1594  1.f, 0.f, 1.f,
1595  0.f, 0.f, 0.f,
1596  -1.f, 0.f, -1.f,
1597 
1598  1.f, 0.f, 1.f,
1599  0.f, 0.f, 0.f,
1600  -1.f, 0.f, -1.f,
1601  },
1602  kernelDesc.GetQuantizationScale(),
1603  kernelDesc.GetQuantizationOffset()));
1604 
1605  auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
1606 
1607  // Manually calculated.
1608  std::vector<T> outputImage(
1609  QuantizedVector<T>({ 0.f, 0.f },
1610  outputTensorInfo.GetQuantizationScale(),
1611  outputTensorInfo.GetQuantizationOffset())
1612  );
1613 
1614  // Optionally apply bias to output image.
1615  if(biasEnabled)
1616  {
1617  ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
1618  biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
1619  outputWidth, outputHeight);
1620  }
1621 
1622  LayerTestResult<T, 4> ret(outputTensorInfo);
1623  if (layout == armnn::DataLayout::NHWC)
1624  {
1625  std::vector<T> tmp(outputImage.size());
1626  armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data(), sizeof(T));
1627  outputImage = tmp;
1628  }
1629 
1630  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
1631 
1632  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
1633  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1634 
1637  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
1638  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
1639 
1640  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
1641  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
1642 
1643  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1644  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1645 
1646  data.m_Weight = &weightsTensor;
1647  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
1648  data.m_Parameters.m_StrideX = 1;
1649  data.m_Parameters.m_StrideY = 1;
1650  data.m_Parameters.m_PadLeft = 0;
1651  data.m_Parameters.m_PadRight = 0;
1652  data.m_Parameters.m_PadTop = 0;
1653  data.m_Parameters.m_PadBottom = 0;
1654  data.m_Parameters.m_BiasEnabled = biasEnabled;
1655  data.m_Parameters.m_DataLayout = layout;
1656 
1657  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
1658  inputHandle->Allocate();
1659  outputHandle->Allocate();
1660 
1661  CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
1662 
1663  ExecuteWorkload(*workload, memoryManager);
1664 
1665  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1666 
1667  return ret;
1668 }
1669 
1670 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
1672  armnn::IWorkloadFactory& workloadFactory,
1674  float qScale,
1675  int32_t qOffset,
1676  bool biasEnabled,
1677  const armnn::DataLayout layout)
1678 {
1680 
1681  unsigned int depthMultiplier = 2;
1682 
1683  unsigned int inputHeight = 8;
1684  unsigned int inputWidth = 16;
1685  unsigned int inputChannels = 2;
1686  unsigned int inputBatchSize = 1;
1687 
1688  unsigned int kernelHeight = 5;
1689  unsigned int kernelWidth = 3;
1690 
1691  unsigned int outputHeight = inputHeight - kernelHeight + 1 + 2;
1692  unsigned int outputWidth = (inputWidth - kernelWidth + 1)/2;
1693  unsigned int outputChannels = inputChannels * depthMultiplier;
1694  unsigned int outputBatchSize = inputBatchSize;
1695 
1696  armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo(
1697  inputBatchSize, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
1698  armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo(
1699  outputBatchSize, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
1700  armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth},
1701  ArmnnType);
1702  armnn::TensorInfo biasDesc({outputChannels}, ArmnnBType);
1703 
1704  // Set quantization parameters if the requested type is a quantized type.
1705  if(armnn::IsQuantizedType<T>())
1706  {
1707  inputTensorInfo.SetQuantizationScale(qScale);
1708  inputTensorInfo.SetQuantizationOffset(qOffset);
1709  outputTensorInfo.SetQuantizationScale(qScale);
1710  outputTensorInfo.SetQuantizationOffset(qOffset);
1711  kernelDesc.SetQuantizationScale(qScale);
1712  kernelDesc.SetQuantizationOffset(qOffset);
1713  biasDesc.SetQuantizationScale(qScale*qScale);
1714  biasDesc.SetQuantizationOffset(0);
1715  }
1716 
1717  // NOTE: originalInputData is in NCHW format
1718  std::vector<T> originalInputData = std::vector<T>(
1719  QuantizedVector<T>({
1720  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1721  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1722  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1723  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1724  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1725  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1726  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1727  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1728  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1729  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1730  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1731  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1732  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1733  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1734  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1735  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
1736  },
1737  inputTensorInfo.GetQuantizationScale(),
1738  inputTensorInfo.GetQuantizationOffset()));
1739 
1740  std::vector<T> inputData = originalInputData;
1741  // at this point if we require it permute the input data
1742  const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
1743  if (layout == armnn::DataLayout::NHWC)
1744  {
1745  armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC,
1746  originalInputData.data(), inputData.data(), sizeof(T));
1747  }
1748  auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
1749 
1750  std::vector<B> biasV = QuantizedVector<B>({ 0, 2, 1, -1 },
1751  biasDesc.GetQuantizationScale(),
1752  biasDesc.GetQuantizationOffset());
1753 
1754  auto bias = MakeTensor<B, 1>(biasDesc, biasV);
1755 
1756  std::vector<T> kernelData = std::vector<T>(
1757  QuantizedVector<T>({
1758  1, 1, 1,
1759  1, -1, 1,
1760  1, 1, 1,
1761  1, 1, 1,
1762  1, 1, 1,
1763 
1764  2, 2, 2,
1765  2, 2, 2,
1766  2, 2, 2,
1767  2, 2, 2,
1768  2, 2, 2,
1769 
1770  0, 0, 0,
1771  0, -1, 0,
1772  0, 0, 0,
1773  0, 0, 0,
1774  0, 0, 0,
1775 
1776  0, 0, 0,
1777  0, 0, 0,
1778  0, 1, 0,
1779  0, 0, 0,
1780  0, 0, 0
1781  },
1782  kernelDesc.GetQuantizationScale(),
1783  kernelDesc.GetQuantizationOffset()));
1784 
1785  auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
1786 
1787  // Manually calculated.
1788  std::vector<T> originalOutputImage = std::vector<T>(
1789  QuantizedVector<T>({
1790  3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f,
1791  6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f,
1792  5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
1793  6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
1794  6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
1795  5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
1796 
1797  -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
1798  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1799  -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
1800  -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
1801  -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
1802  -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
1803 
1804  8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1805  10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1806  10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1807  10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1808  10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1809  8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1810 
1811  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1812  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1813  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1814  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1815  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1816  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
1817  },
1818  outputTensorInfo.GetQuantizationScale(),
1819  outputTensorInfo.GetQuantizationOffset()));
1820 
1821  // Optionally apply bias to output image.
1822  if(biasEnabled)
1823  {
1824  ApplyBias(originalOutputImage,
1825  outputTensorInfo.GetQuantizationScale(),
1826  outputTensorInfo.GetQuantizationOffset(),
1827  biasV,
1828  biasDesc.GetQuantizationScale(),
1829  biasDesc.GetQuantizationOffset(),
1830  outputWidth,
1831  outputHeight);
1832  }
1833 
1834  LayerTestResult<T, 4> ret(outputTensorInfo);
1835  std::vector<T> outputImage = originalOutputImage;
1836  if (layout == armnn::DataLayout::NHWC)
1837  {
1838  armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC,
1839  originalOutputImage.data(), outputImage.data(), sizeof(T));
1840  }
1841 
1842  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
1843 
1844  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
1845  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1846 
1849  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
1850  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
1851 
1852  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
1853  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
1854 
1855  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1856  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1857 
1858  data.m_Weight = &weightsTensor;
1859  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
1860  data.m_Parameters.m_StrideX = 2;
1861  data.m_Parameters.m_StrideY = 1;
1862  data.m_Parameters.m_PadLeft = 0;
1863  data.m_Parameters.m_PadRight = 0;
1864  data.m_Parameters.m_PadTop = 1;
1865  data.m_Parameters.m_PadBottom = 1;
1866  data.m_Parameters.m_BiasEnabled = biasEnabled;
1867  data.m_Parameters.m_DataLayout = layout;
1868 
1869  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
1870  inputHandle->Allocate();
1871  outputHandle->Allocate();
1872 
1873  CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
1874 
1875  ExecuteWorkload(*workload, memoryManager);
1876 
1877  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1878 
1879  return ret;
1880 }
1881 
1882 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
1885  armnn::IWorkloadFactory& workloadFactory,
1887  const boost::multi_array<T, 4>& originalInput,
1888  const boost::multi_array<T, 4>& originalKernel,
1889  const boost::multi_array<B, 1>& bias,
1890  const boost::multi_array<T, 4>& originalOutputExpected,
1891  float qScale,
1892  int32_t qOffset,
1894  uint32_t padLeft = 0,
1895  uint32_t padTop = 0,
1896  uint32_t padRight = 0,
1897  uint32_t padBottom = 0,
1898  uint32_t strideX = 1,
1899  uint32_t strideY = 1,
1900  uint32_t dilationX = 1,
1901  uint32_t dilationY = 1)
1902 {
1903  unsigned int inputHeight = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
1904  unsigned int inputWidth = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
1905  unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
1906  unsigned int inputNum = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
1907 
1908  unsigned int outputHeight = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
1909  unsigned int outputWidth = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
1910  unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
1911  unsigned int outputNum = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
1912 
1913  unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
1914  unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
1915  unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
1916  unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
1917 
1918  bool biasEnabled = bias.size() > 0;
1919 
1920  // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
1921  BOOST_ASSERT(inputNum == 1);
1922  BOOST_ASSERT(outputNum == 1);
1923 
1924  // If a bias is used, its size must equal the number of output channels.
1925  BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels);
1926 
1927 
1928  // Note these tensors will use two (identical) batches.
1929  armnn::TensorInfo inputTensorInfo =
1930  armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
1931  armnn::TensorInfo outputTensorInfo =
1932  armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
1933 
1934  // Kernel must be NCHW layout always, independently of the layout of the input and output for depthwise convolution.
1935  armnn::TensorInfo kernelDesc({kernelDepthMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
1936 
1937  armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
1938 
1939  // Set quantization parameters if the requested type is a quantized type.
1940  if(armnn::IsQuantizedType<T>())
1941  {
1942  inputTensorInfo.SetQuantizationScale(qScale);
1943  inputTensorInfo.SetQuantizationOffset(qOffset);
1944  outputTensorInfo.SetQuantizationScale(qScale);
1945  outputTensorInfo.SetQuantizationOffset(qOffset);
1946  kernelDesc.SetQuantizationScale(qScale);
1947  kernelDesc.SetQuantizationOffset(qOffset);
1948  biasDesc.SetQuantizationScale(qScale*qScale);
1949  biasDesc.SetQuantizationOffset(0);
1950  }
1951 
1952  LayerTestResult<T, 4> ret(outputTensorInfo);
1953 
1954  // Construct input data
1955  std::vector<T> input;
1956  input.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth);
1957  std::vector<T> inputData;
1958  inputData.insert(inputData.end(), input.begin(), input.end());
1959  inputData.insert(inputData.end(), input.begin(), input.end());
1960 
1961  // at this point if we require it permute the input data
1962  const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
1963  if (layout == armnn::DataLayout::NHWC)
1964  {
1965  std::vector<T> tmp(inputData.size());
1966  armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
1967  inputData = tmp;
1968  }
1969 
1970  auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
1971 
1972  std::vector<T> output;
1973  output.assign(originalOutputExpected.data(),
1974  originalOutputExpected.data() + outputChannels*outputHeight*outputWidth);
1975 
1976  // Apply bias to output data if it is enabled.
1977  if(biasEnabled)
1978  {
1979  std::vector<T> biasV;
1980  biasV.assign(bias.data(), bias.data() + outputChannels);
1981  ApplyBias(output, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
1982  biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
1983  outputWidth, outputHeight);
1984  }
1985 
1986  // Construct expected output data
1987  std::vector<T> outputData;
1988  outputData.insert(outputData.end(), output.begin(), output.end());
1989  outputData.insert(outputData.end(), output.begin(), output.end());
1990 
1991  // at this point if we require it permute the expected output
1992  if (layout == armnn::DataLayout::NHWC)
1993  {
1994  std::vector<T> tmp(outputData.size());
1995  armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
1996  outputData = tmp;
1997  }
1998  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
1999 
2000  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2001  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2002 
2005  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
2006  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
2007 
2008  boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
2009  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
2010 
2011  if(biasEnabled)
2012  {
2013  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
2014  }
2015 
2016  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
2017  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2018 
2019  data.m_Weight = &weightsTensor;
2020  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
2021  data.m_Parameters.m_StrideX = strideX;
2022  data.m_Parameters.m_StrideY = strideY;
2023  data.m_Parameters.m_PadLeft = padLeft;
2024  data.m_Parameters.m_PadRight = padRight;
2025  data.m_Parameters.m_PadTop = padTop;
2026  data.m_Parameters.m_PadBottom = padBottom;
2027  data.m_Parameters.m_BiasEnabled = biasEnabled;
2028  data.m_Parameters.m_DataLayout = layout;
2029  data.m_Parameters.m_DilationX = dilationX;
2030  data.m_Parameters.m_DilationY = dilationY;
2031 
2032  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
2033  inputHandle->Allocate();
2034  outputHandle->Allocate();
2035 
2036  CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
2037 
2038  ExecuteWorkload(*workload, memoryManager);
2039 
2040  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
2041 
2042  return ret;
2043 }
2044 
2045 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2046  typename T = armnn::ResolveType<ArmnnType>>
2048  armnn::IWorkloadFactory& workloadFactory,
2050  float qScale,
2051  int32_t qOffset,
2052  bool biasEnabled,
2053  const armnn::DataLayout layout)
2054 {
2055  // Use a single-batch 2-channel 5x5 image as input.
2056  armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
2057  auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
2058  QuantizedVector<T>({
2059  0, 1, 2, 3, 4,
2060  5, 6, 7, 8, 9,
2061  10, 11, 12, 13, 14,
2062  15, 16, 17, 18, 19,
2063  20, 21, 22, 23, 24,
2064 
2065  25, 26, 27, 28, 29,
2066  30, 31, 32, 33, 34,
2067  35, 36, 37, 38, 39,
2068  40, 41, 42, 43, 44,
2069  45, 46, 47, 48, 49
2070  },
2071  inputTensorInfo.GetQuantizationScale(),
2072  inputTensorInfo.GetQuantizationOffset())));
2073 
2074  // Use a depth multiplier of 1 on a 2-channel 4x4 kernel.
2075  armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
2076  auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
2077  QuantizedVector<T>({
2078  32, 31, 30, 29,
2079  28, 27, 26, 25,
2080  24, 23, 22, 21,
2081  20, 19, 18, 17,
2082 
2083  16, 15, 14, 13,
2084  12, 11, 10, 9,
2085  8, 7, 6, 5,
2086  4, 3, 2, 1
2087  },
2088  kernelTensorInfo.GetQuantizationScale(),
2089  kernelTensorInfo.GetQuantizationOffset())));
2090 
2091  // Expected output is 1 batch of a 2-channel 5x5 image.
2092  // Calculated using the python tensorflow library with strideX=1, strideY=1.
2093  armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
2094  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
2095  QuantizedVector<T>({
2096  1062, 1580, 1850, 1530, 1117,
2097  2140, 3108, 3500, 2842, 2042,
2098  3580, 5068, 5460, 4342, 3062,
2099  3618, 5072, 5390, 4248, 2971,
2100  3074, 4282, 4510, 3533, 2457,
2101 
2102  1550, 2284, 2362, 1955, 1428,
2103  2910, 4206, 4342, 3528, 2536,
2104  3390, 4886, 5022, 4068, 2916,
2105  3566, 5056, 5182, 4133, 2922,
2106  3100, 4352, 4452, 3517, 2465
2107  },
2108  outputTensorInfo.GetQuantizationScale(),
2109  outputTensorInfo.GetQuantizationOffset())));
2110 
2111  return DepthwiseConvolution2dAsymmetricTestImpl<ArmnnType, ArmnnBType>(
2112  workloadFactory,
2113  memoryManager,
2114  input,
2115  kernel,
2116  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
2117  expectedOutput,
2118  qScale,
2119  qOffset,
2120  layout,
2121  1, // Padding left.
2122  1, // Padding top.
2123  2, // Padding right.
2124  2, // Padding bottom.
2125  1, // strideX
2126  1); // strideY
2127 }
2128 
2129 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2130  typename T = armnn::ResolveType<ArmnnType>>
2132  armnn::IWorkloadFactory& workloadFactory,
2134  float qScale,
2135  int32_t qOffset,
2136  bool biasEnabled)
2137 {
2138  auto layout = armnn::DataLayout::NHWC;
2139 
2140  armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
2141  auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
2142  QuantizedVector<T>({
2143  0, 1, 2, 3, 4,
2144  5, 6, 7, 8, 9,
2145  10, 11, 12, 13, 14,
2146  15, 16, 17, 18, 19,
2147  20, 21, 22, 23, 24,
2148 
2149  25, 26, 27, 28, 29,
2150  30, 31, 32, 33, 34,
2151  35, 36, 37, 38, 39,
2152  40, 41, 42, 43, 44,
2153  45, 46, 47, 48, 49
2154  },
2155  inputTensorInfo.GetQuantizationScale(),
2156  inputTensorInfo.GetQuantizationOffset())));
2157 
2158  armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
2159  auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
2160  QuantizedVector<T>({
2161  32, 31, 30, 29,
2162  28, 27, 26, 25,
2163  24, 23, 22, 21,
2164  20, 19, 18, 17,
2165 
2166  16, 15, 14, 13,
2167  12, 11, 10, 9,
2168  8, 7, 6, 5,
2169  4, 3, 2, 1
2170  },
2171  kernelTensorInfo.GetQuantizationScale(),
2172  kernelTensorInfo.GetQuantizationOffset())));
2173 
2174  armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
2175  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
2176  QuantizedVector<T>({
2177  1062, 1580, 1850, 1530, 1117,
2178  2140, 3108, 3500, 2842, 2042,
2179  3580, 5068, 5460, 4342, 3062,
2180  3618, 5072, 5390, 4248, 2971,
2181  3074, 4282, 4510, 3533, 2457,
2182 
2183  1550, 2284, 2362, 1955, 1428,
2184  2910, 4206, 4342, 3528, 2536,
2185  3390, 4886, 5022, 4068, 2916,
2186  3566, 5056, 5182, 4133, 2922,
2187  3100, 4352, 4452, 3517, 2465
2188  },
2189  outputTensorInfo.GetQuantizationScale(),
2190  outputTensorInfo.GetQuantizationOffset())));
2191 
2192  return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
2193  workloadFactory,
2194  memoryManager,
2195  input,
2196  kernel,
2197  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
2198  expectedOutput,
2199  qScale,
2200  qOffset,
2201  layout,
2202  1, // Padding left.
2203  1, // Padding top.
2204  2, // Padding right.
2205  2, // Padding bottom.
2206  1, // strideX
2207  1); // strideY
2208 }
2209 
2210 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2211  typename T = armnn::ResolveType<ArmnnType>>
2213  armnn::IWorkloadFactory& workloadFactory,
2215  float qScale,
2216  int32_t qOffset,
2217  bool biasEnabled)
2218 {
2219  auto layout = armnn::DataLayout::NHWC;
2220 
2221  armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9}, ArmnnType);
2222  auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
2223  QuantizedVector<T>({
2224  0, 0, 0, 0, 0, 0, 0, 0, 0,
2225  0, 0, 0, 0, 0, 0, 0, 0, 0,
2226  0, 0, 0, 0, 0, 0, 0, 0, 0,
2227  0, 0, 0, 1, 1, 1, 0, 0, 0,
2228  0, 0, 0, 1, 1, 1, 0, 0, 0,
2229  0, 0, 0, 1, 1, 1, 0, 0, 0,
2230  0, 0, 0, 0, 0, 0, 0, 0, 0,
2231  0, 0, 0, 0, 0, 0, 0, 0, 0,
2232  0, 0, 0, 0, 0, 0, 0, 0, 0
2233  },
2234  inputTensorInfo.GetQuantizationScale(),
2235  inputTensorInfo.GetQuantizationOffset())));
2236 
2237  armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
2238  auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
2239  QuantizedVector<T>({
2240  1, 2, 3,
2241  4, 5, 6,
2242  7, 8, 9
2243  },
2244  kernelTensorInfo.GetQuantizationScale(),
2245  kernelTensorInfo.GetQuantizationOffset())));
2246 
2247  uint32_t padLeft = 0;
2248  uint32_t padTop = 0;
2249  uint32_t padRight = 0;
2250  uint32_t padBottom = 0;
2251  uint32_t strideX = 1;
2252  uint32_t strideY = 1;
2253  uint32_t dilationX = 3;
2254  uint32_t dilationY = 3;
2255 
2256  // Since the dilation rate is 3 this will reduce the size of the output from 9x9 to 3x3 of all 5s.
2257  armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3}, ArmnnType);
2258  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
2259  QuantizedVector<T>({
2260  5, 5, 5,
2261  5, 5, 5,
2262  5, 5, 5
2263  },
2264  outputTensorInfo.GetQuantizationScale(),
2265  outputTensorInfo.GetQuantizationOffset())));
2266 
2267  return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
2268  workloadFactory,
2269  memoryManager,
2270  input,
2271  kernel,
2272  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
2273  expectedOutput,
2274  qScale,
2275  qOffset,
2276  layout,
2277  padLeft,
2278  padTop,
2279  padRight,
2280  padBottom,
2281  strideX,
2282  strideY,
2283  dilationX,
2284  dilationY);
2285 }
2286 
2287 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
2289  armnn::IWorkloadFactory& workloadFactory,
2291  const std::vector<float>& inputNoQuantizedValues,
2292  armnn::TensorInfo& inputTensorInfo,
2293  const std::vector<float>& kernelNoQuantizedValues,
2294  armnn::TensorInfo& kernelTensorInfo,
2295  const std::vector<float>& outputExpectedNoQuantizedValues,
2296  armnn::TensorInfo& outputTensorInfo,
2297  uint32_t dilationX,
2298  uint32_t dilationY,
2300  bool biasEnabled = false)
2301 {
2302  float qScale;
2303  int32_t qOffset;
2304  switch (ArmnnType)
2305  {
2307  {
2308  qScale = 0.1f;
2309  qOffset = 128;
2310  break;
2311  }
2313  {
2314  qScale = 0.1f;
2315  qOffset = 0;
2316  break;
2317  }
2319  default:
2320  {
2321  qScale = 0.f;
2322  qOffset = 0;
2323  break;
2324  }
2325  }
2326 
2327  inputTensorInfo.SetQuantizationScale(qScale);
2328  inputTensorInfo.SetQuantizationOffset(qOffset);
2329  kernelTensorInfo.SetQuantizationScale(qScale);
2330  kernelTensorInfo.SetQuantizationOffset(qOffset);
2331  outputTensorInfo.SetQuantizationScale(qScale);
2332  outputTensorInfo.SetQuantizationOffset(qOffset);
2333 
2334  auto input = MakeTensor<T, 4>(inputTensorInfo,
2335  std::vector<T>(QuantizedVector<T>(inputNoQuantizedValues,
2336  inputTensorInfo.GetQuantizationScale(),
2337  inputTensorInfo.GetQuantizationOffset())));
2338  auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
2339  std::vector<T>(QuantizedVector<T>(kernelNoQuantizedValues,
2340  kernelTensorInfo.GetQuantizationScale(),
2341  kernelTensorInfo.GetQuantizationOffset())));
2342  auto expectedOutput =
2343  MakeTensor<T, 4>(outputTensorInfo,
2344  std::vector<T>(QuantizedVector<T>(outputExpectedNoQuantizedValues,
2345  outputTensorInfo.GetQuantizationScale(),
2346  outputTensorInfo.GetQuantizationOffset())));
2347 
2348  uint32_t padLeft = 0;
2349  uint32_t padTop = 0;
2350  uint32_t padRight = 0;
2351  uint32_t padBottom = 0;
2352  uint32_t strideX = 1;
2353  uint32_t strideY = 1;
2354 
2355  return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
2356  workloadFactory,
2357  memoryManager,
2358  input,
2359  kernel,
2360  GetBias<ArmnnBType>(biasEnabled, qScale * qScale, outputTensorInfo, layout),
2361  expectedOutput,
2362  qScale,
2363  qOffset,
2364  layout,
2365  padLeft,
2366  padTop,
2367  padRight,
2368  padBottom,
2369  strideX,
2370  strideY,
2371  dilationX,
2372  dilationY);
2373 }
2374 
2375 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
2377  armnn::IWorkloadFactory& workloadFactory,
2379  bool biasEnabled,
2380  const armnn::DataLayout layout)
2381 {
2382  armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
2383  std::vector<float> inputNoQuantizedValues =
2384  {
2385  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2386  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2387  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2388  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2389  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2390  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2391  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2392  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2393  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2394  0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2395  };
2396 
2397  armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
2398  std::vector<float> kernelNoQuantizedValues =
2399  {
2400  1, 2, 3,
2401  4, 5, 6,
2402  7, 8, 9
2403  };
2404 
2405  // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
2406  // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
2407  armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
2408  std::vector<float> outputExpectedNoQuantizedValues =
2409  {
2410  6., 5., 5., 5.,
2411  6., 5., 5., 5.,
2412  6., 5., 5., 5.,
2413  3., 2., 2., 2.
2414  };
2415 
2416  return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2417  workloadFactory,
2418  memoryManager,
2419  inputNoQuantizedValues,
2420  inputTensorInfo,
2421  kernelNoQuantizedValues,
2422  kernelTensorInfo,
2423  outputExpectedNoQuantizedValues,
2424  outputTensorInfo,
2425  3,
2426  3,
2427  layout,
2428  biasEnabled);
2429 }
2430 
2431 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
2433  armnn::IWorkloadFactory& workloadFactory,
2435  bool biasEnabled,
2436  const armnn::DataLayout layout)
2437 {
2438  armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
2439  std::vector<float> inputNoQuantizedValues =
2440  {
2441  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2442  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2443  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2444  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2445  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2446  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2447  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2448  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2449  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2450  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2451 
2452  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2453  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2454  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2455  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2456  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2457  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2458  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2459  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2460  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2461  0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2462  };
2463 
2464  armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
2465  std::vector<float> kernelNoQuantizedValues =
2466  {
2467  1, 2, 3,
2468  4, 5, 6,
2469  7, 8, 9,
2470 
2471  1, 2, 3,
2472  4, 5, 6,
2473  7, 8, 9
2474  };
2475 
2476  // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
2477  // therefore the output will be 2x4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
2478  armnn::TensorInfo outputTensorInfo({ 1, 2, 4, 4}, ArmnnType);
2479  std::vector<float> outputExpectedNoQuantizedValues =
2480  {
2481  6., 5., 5., 5.,
2482  6., 5., 5., 5.,
2483  6., 5., 5., 5.,
2484  3., 2., 2., 2.,
2485 
2486  6., 5., 5., 5.,
2487  6., 5., 5., 5.,
2488  6., 5., 5., 5.,
2489  3., 2., 2., 2.
2490  };
2491 
2492  return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2493  workloadFactory,
2494  memoryManager,
2495  inputNoQuantizedValues,
2496  inputTensorInfo,
2497  kernelNoQuantizedValues,
2498  kernelTensorInfo,
2499  outputExpectedNoQuantizedValues,
2500  outputTensorInfo,
2501  3,
2502  3,
2503  layout,
2504  biasEnabled);
2505 }
2506 
2507 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
2509  armnn::IWorkloadFactory& workloadFactory,
2511  bool biasEnabled,
2512  const armnn::DataLayout layout)
2513 {
2514  armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
2515  std::vector<float> inputNoQuantizedValues =
2516  {
2517  10.0, 10.0, 10.0,
2518  10.0, 10.0, 10.0,
2519  10.0, 10.0, 10.0,
2520 
2521  21.0, 22.0, 23.0,
2522  24.0, 25.0, 26.0,
2523  27.0, 28.0, 29.0
2524  };
2525 
2526  armnn::TensorInfo kernelTensorInfo({ 4, 2, 2, 2}, ArmnnType);
2527 
2528  std::vector<float> kernelNoQuantizedValues =
2529  {
2530  0.25f, 0.25f,
2531  0.25f, 0.25f,
2532 
2533  0.25f, 0.25f,
2534  0.25f, 0.25f,
2535 
2536  0.0f , 0.0f,
2537  0.0f , 0.1f,
2538 
2539  0.0f , 0.0f,
2540  0.0f , 0.1f,
2541 
2542  0.2f , 0.0f,
2543  0.0f , 0.0f,
2544 
2545  0.2f , 0.0f,
2546  0.0f , 0.0f,
2547 
2548  0.0f , 0.3f,
2549  0.0f , 0.0f,
2550 
2551  0.0f , 0.3f,
2552  0.0f , 0.0f
2553  };
2554 
2555  armnn::TensorInfo outputTensorInfo({ 1, 8, 2, 2}, ArmnnType);
2556  std::vector<float> outputExpectedNoQuantizedValues =
2557  {
2558  10.f, 10.f,
2559  10.f, 10.f,
2560 
2561  1.f, 1.f,
2562  1.f, 1.f,
2563 
2564  2.f, 2.f,
2565  2.f, 2.f,
2566 
2567  3.f, 3.f,
2568  3.f, 3.f,
2569 
2570  23.f, 24.f,
2571  26.f, 27.f,
2572 
2573  2.5f, 2.6000001f,
2574  2.8f, 2.9f,
2575 
2576  4.2000003f, 4.4f,
2577  4.8f, 5.f,
2578 
2579  6.6000004f, 6.9f,
2580  7.5000005f, 7.8f
2581  };
2582 
2583 
2584  return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2585  workloadFactory,
2586  memoryManager,
2587  inputNoQuantizedValues,
2588  inputTensorInfo,
2589  kernelNoQuantizedValues,
2590  kernelTensorInfo,
2591  outputExpectedNoQuantizedValues,
2592  outputTensorInfo,
2593  1,
2594  1,
2595  layout,
2596  biasEnabled);
2597 }
2598 
2599 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
2601  armnn::IWorkloadFactory& workloadFactory,
2603  bool biasEnabled,
2604  const armnn::DataLayout layout)
2605 {
2606  armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
2607  std::vector<float> inputNoQuantizedValues =
2608  {
2609  10.0, 10.0, 10.0,
2610  10.0, 10.0, 10.0,
2611  10.0, 10.0, 10.0,
2612 
2613  21.0, 22.0, 23.0,
2614  24.0, 25.0, 26.0,
2615  27.0, 28.0, 29.0
2616  };
2617 
2618  armnn::TensorInfo kernelTensorInfo({ 2, 2, 2, 2}, ArmnnType);
2619 
2620  std::vector<float> kernelNoQuantizedValues =
2621  {
2622  0.25f, 0.25f,
2623  0.25f, 0.25f,
2624 
2625  0.2f , 0.0f,
2626  0.0f , 0.0f,
2627 
2628  0.0f , 0.0f,
2629  0.0f , 0.1f,
2630 
2631  0.0f , 0.3f,
2632  0.0f , 0.0f
2633 
2634  };
2635 
2636  armnn::TensorInfo outputTensorInfo({ 1, 4, 2, 2}, ArmnnType);
2637  std::vector<float> outputExpectedNoQuantizedValues =
2638  {
2639  10.f, 10.f,
2640  10.f, 10.f,
2641 
2642  1.f, 1.f,
2643  1.f, 1.f,
2644 
2645  4.2000003f, 4.4f,
2646  4.8f, 5.f,
2647 
2648  6.6000004f, 6.9f,
2649  7.5000005f, 7.8f
2650  };
2651 
2652 
2653  return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2654  workloadFactory,
2655  memoryManager,
2656  inputNoQuantizedValues,
2657  inputTensorInfo,
2658  kernelNoQuantizedValues,
2659  kernelTensorInfo,
2660  outputExpectedNoQuantizedValues,
2661  outputTensorInfo,
2662  1,
2663  1,
2664  layout,
2665  biasEnabled);
2666 }
2667 
2668 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
2670  armnn::IWorkloadFactory& workloadFactory,
2672  armnn::IWorkloadFactory& refWorkloadFactory,
2673  const armnnUtils::DataLayoutIndexed& layout)
2674 {
2675  unsigned int inputHeight = 8;
2676  unsigned int inputWidth = 16;
2677  unsigned int inputChannels = 3;
2678  unsigned int inputNum = 5;
2679 
2680  unsigned int kernelHeight = 3;
2681  unsigned int kernelWidth = 3;
2682  unsigned int channelMultiplier = 1;
2683 
2684  unsigned int strideX = 2;
2685  unsigned int strideY = 3;
2686  unsigned int padX = 1;
2687  unsigned int padY = 1;
2688 
2689  unsigned int outputNum = inputNum;
2690  unsigned int outputChannels = inputChannels * channelMultiplier;
2691  unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
2692  unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
2693 
2694  armnn::TensorInfo inputTensorInfo;
2695  armnn::TensorInfo outputTensorInfo;
2696  armnn::TensorInfo kernelDesc;
2697  armnn::TensorInfo biasDesc;
2698 
2699 
2700  std::vector<unsigned int> inputShape;
2701  std::vector<unsigned int> outputShape;
2702  std::vector<unsigned int> kernelShape{ channelMultiplier, inputChannels, kernelHeight, kernelWidth };
2703  std::vector<unsigned int> biasShape{ outputChannels };
2704  switch (layout.GetDataLayout())
2705  {
2707  inputShape = { inputNum, inputChannels, inputHeight, inputWidth };
2708  outputShape = { outputNum, outputChannels, outputHeight, outputWidth };
2709  break;
2711  inputShape = { inputNum, inputHeight, inputWidth, inputChannels };
2712  outputShape = { outputNum, outputHeight, outputWidth, outputChannels };
2713  break;
2714  default:
2715  throw armnn::InvalidArgumentException("unknown data layout ["
2716  + std::to_string(static_cast<int>(layout.GetDataLayout())) + "]");
2717  }
2718 
2719  float inputsQScale = armnn::IsQuantizedType<T>() ? 1.0f : 0;
2720  float outputQScale = armnn::IsQuantizedType<T>() ? 2.0f : 0;
2721  int32_t qOffset = 0;
2722 
2723  inputTensorInfo = armnn::TensorInfo(4, inputShape.data(), ArmnnType, inputsQScale, qOffset);
2724  outputTensorInfo = armnn::TensorInfo(4, outputShape.data(), ArmnnType, outputQScale, qOffset);
2725  kernelDesc = armnn::TensorInfo(4, kernelShape.data(), ArmnnType, inputsQScale, qOffset);
2726  biasDesc = armnn::TensorInfo(
2727  1, biasShape.data(), armnn::GetBiasDataType(ArmnnType), inputsQScale, qOffset);
2728 
2729  LayerTestResult<T, 4> ret(outputTensorInfo);
2730 
2731  auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908, 0.0f, 255.0f);
2732  auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234, 0.0f, 255.0f);
2733  auto bias = MakeRandomTensor<typename FullyConnectedBiasTypeForInputType<T>::Type, 1>(
2734  biasDesc, 1028, 0.0f, 255.0f);
2735 
2736  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2737  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2738 
2741  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
2742  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
2743 
2744  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
2745  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
2746 
2747  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
2748  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2749  data.m_Weight = &weightsTensor;
2750  data.m_Bias = &biasTensor;
2751  data.m_Parameters.m_StrideX = strideX;
2752  data.m_Parameters.m_StrideY = strideY;
2753  data.m_Parameters.m_PadLeft = padX;
2754  data.m_Parameters.m_PadRight = padX;
2755  data.m_Parameters.m_PadTop = padY;
2756  data.m_Parameters.m_PadBottom = padY;
2757  data.m_Parameters.m_BiasEnabled = true;
2758  data.m_Parameters.m_DataLayout = layout.GetDataLayout();
2759 
2760  std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
2761  std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
2762 
2764  armnn::WorkloadInfo refInfo = info;
2765  SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
2766  SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
2767 
2768  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
2769  std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateDepthwiseConvolution2d(refData, refInfo);
2770 
2771  outputHandleRef->Allocate();
2772  inputHandleRef->Allocate();
2773 
2774  inputHandle->Allocate();
2775  outputHandle->Allocate();
2776 
2777  CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
2778  CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
2779 
2780  ExecuteWorkload(*workload, memoryManager);
2781 
2782  workloadRef->PostAllocationConfigure();
2783  workloadRef->Execute();
2784 
2785  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
2786  CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
2787 
2788  return ret;
2789 }
2790 
2791 //
2792 // Explicit template specializations
2793 //
2794 
2796 Convolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
2799  bool,
2801 
2803 Convolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
2804  armnn::IWorkloadFactory&,
2805  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2806  bool,
2808 
2810 Convolution2d3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
2811  armnn::IWorkloadFactory&,
2812  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2813  bool,
2815 
2817 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
2818  armnn::IWorkloadFactory&,
2819  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2820  bool,
2822 
2823 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
2824 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
2825  armnn::IWorkloadFactory&,
2826  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2827  bool,
2829 
2830 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
2831 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
2832  armnn::IWorkloadFactory&,
2833  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2834  bool,
2836 
2837 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
2838 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
2839  armnn::IWorkloadFactory &workloadFactory,
2840  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
2841  bool biasEnabled,
2842  const armnn::DataLayout layout);
2843 
2844 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
2845 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
2846  armnn::IWorkloadFactory &workloadFactory,
2847  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
2848  bool biasEnabled,
2849  const armnn::DataLayout layout);
2850 
2851 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
2852 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
2853  armnn::IWorkloadFactory &workloadFactory,
2854  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
2855  bool biasEnabled,
2856  const armnn::DataLayout layout);
2857 
2858 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
2859 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
2860  armnn::IWorkloadFactory&,
2861  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2862  bool,
2864 
2865 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
2866 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
2867  armnn::IWorkloadFactory&,
2868  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2869  bool,
2871 
2872 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
2873 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
2874  armnn::IWorkloadFactory&,
2875  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2876  bool,
2878 
2879 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
2880 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
2881  armnn::IWorkloadFactory&,
2882  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2883  bool,
2885 
2886 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
2887 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
2888  armnn::IWorkloadFactory&,
2889  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2890  bool,
2892 
2893 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
2894 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
2895  armnn::IWorkloadFactory&,
2896  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2897  bool,
2899 
2900 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
2901 DepthwiseConvolution2dMult4Test<armnn::DataType::Float32, armnn::DataType::Float32>(
2902  armnn::IWorkloadFactory &workloadFactory,
2903  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
2904  bool biasEnabled,
2905  const armnn::DataLayout layout);
2906 
2907 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
2908 DepthwiseConvolution2dMult2Test<armnn::DataType::Float32, armnn::DataType::Float32>(
2909  armnn::IWorkloadFactory &workloadFactory,
2910  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
2911  bool biasEnabled,
2912  const armnn::DataLayout layout);
2913 
2914 //
2915 // Implementation functions
2916 //
2917 
2919  armnn::IWorkloadFactory& workloadFactory,
2920  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2921  bool biasEnabled,
2922  const armnn::DataLayout layout)
2923 {
2924  return SimpleConvolution2d3x5TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
2925  workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
2926 }
2927 
2929  armnn::IWorkloadFactory& workloadFactory,
2930  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2931  bool biasEnabled,
2932  const armnn::DataLayout layout)
2933 {
2934  return SimpleConvolution2d3x5TestCommon<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
2935  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
2936 }
2937 
2939  armnn::IWorkloadFactory& workloadFactory,
2940  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2941  bool biasEnabled,
2942  const armnn::DataLayout layout)
2943 {
2944  return SimpleConvolution2d3x3TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
2945  workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
2946 }
2947 
2949  armnn::IWorkloadFactory& workloadFactory,
2950  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2951  bool biasEnabled)
2952 {
2953  return SimpleConvolution2d3x3NhwcTestCommon<armnn::DataType::Float32>(
2954  workloadFactory,
2955  memoryManager,
2956  0.f,
2957  0,
2958  biasEnabled,
2960 }
2961 
2963  armnn::IWorkloadFactory& workloadFactory,
2964  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2965  bool biasEnabled,
2966  const armnn::DataLayout layout)
2967 {
2968  return SimpleConvolution2d3x3Stride2x2TestCommon<armnn::DataType::Float32>(
2969  workloadFactory,
2970  memoryManager,
2971  0.f,
2972  0,
2973  biasEnabled,
2974  layout);
2975 }
2976 
2978  armnn::IWorkloadFactory& workloadFactory,
2979  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2980  bool biasEnabled,
2981  const armnn::DataLayout layout)
2982 {
2983  return SimpleConvolution2d3x3TestCommon<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
2984  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
2985 }
2986 
2988  armnn::IWorkloadFactory& workloadFactory,
2989  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2990  bool biasEnabled,
2991  const armnn::DataLayout layout)
2992 {
2993  return SimpleConvolution2d3x5TestCommon<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
2994  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
2995 }
2996 
2998  armnn::IWorkloadFactory& workloadFactory,
2999  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3000  bool biasEnabled,
3001  const armnn::DataLayout layout)
3002 {
3003  return SimpleConvolution2d3x3TestCommon<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3004  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
3005 }
3006 
3008  armnn::IWorkloadFactory& workloadFactory,
3009  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3010  armnn::DataLayout layout)
3011 {
3012  return SimpleConvolution2dAsymmetricPaddingTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3013  workloadFactory, memoryManager, layout, 0.0f, 0);
3014 }
3015 
3017  armnn::IWorkloadFactory& workloadFactory,
3018  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3019  armnn::DataLayout layout)
3020 {
3022  <armnn::DataType::Float32, armnn::DataType::Float32>(
3023  workloadFactory, memoryManager, layout, 0.0f, 0);
3024 }
3025 
3027  armnn::IWorkloadFactory& workloadFactory,
3028  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3029  bool biasEnabled)
3030 {
3031  return Convolution1dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3032  workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
3033 }
3034 
3036  armnn::IWorkloadFactory& workloadFactory,
3037  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3038  bool biasEnabled)
3039 {
3040  return Convolution1dTestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3041  workloadFactory, memoryManager, 0.1f, 128, biasEnabled);
3042 }
3043 
3045  armnn::IWorkloadFactory& workloadFactory,
3046  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3047  const armnn::DataLayout layout)
3048 {
3049  using namespace armnn;
3050 
3051  const DataType inputType = DataType::QAsymmU8;
3052  const DataType kernelType = DataType::QSymmS8;
3053  const DataType biasType = DataType::Signed32;
3054 
3055  TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128);
3056  TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128);
3057 
3058  const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f };
3059  constexpr unsigned int quantDimension = 0;
3060 
3061  TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension);
3062 
3063  const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f };
3064  TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension);
3065 
3066  std::vector<uint8_t> inputData =
3067  {
3068  138, 108, 138, 108, 138, 108
3069  };
3070 
3071  std::vector<int8_t> kernelData =
3072  {
3073  1, 2, 1, 2, 1, 2
3074  };
3075 
3076  std::vector<int32_t> biasData =
3077  {
3078  4, 4, 4
3079  };
3080 
3081  std::vector<uint8_t> expectedOutputData =
3082  {
3083  121, 118, 115, 121, 118, 115, 121, 118, 115
3084  };
3085 
3086  if (layout == DataLayout::NCHW)
3087  {
3088  PermuteTensorNhwcToNchw(inputInfo, inputData);
3089  PermuteTensorNhwcToNchw(kernelInfo, kernelData);
3090  PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
3091  }
3092 
3093  Convolution2dDescriptor descriptor;
3094  descriptor.m_StrideX = 1;
3095  descriptor.m_StrideY = 1;
3096  descriptor.m_PadLeft = 0;
3097  descriptor.m_PadRight = 0;
3098  descriptor.m_PadTop = 0;
3099  descriptor.m_PadBottom = 0;
3100  descriptor.m_BiasEnabled = true;
3101  descriptor.m_DataLayout = layout;
3102 
3103  std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
3104  std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
3105 
3106  WorkloadInfo workloadInfo;
3107  ScopedCpuTensorHandle weightTensor(kernelInfo);
3108  ScopedCpuTensorHandle biasTensor(biasInfo);
3109 
3110  AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
3111  AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
3112 
3113  Convolution2dQueueDescriptor queueDescriptor;
3114  queueDescriptor.m_Parameters = descriptor;
3115  queueDescriptor.m_Weight = &weightTensor;
3116  queueDescriptor.m_Bias = &biasTensor;
3117 
3118  AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
3119  AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
3120 
3121  std::unique_ptr<IWorkload> workload = workloadFactory.CreateConvolution2d(queueDescriptor, workloadInfo);
3122  inputHandle->Allocate();
3123  outputHandle->Allocate();
3124 
3125  CopyDataToITensorHandle(inputHandle.get(), inputData.data());
3126 
3127  ExecuteWorkload(*workload, memoryManager);
3128 
3129  LayerTestResult<uint8_t, 4> ret(outputInfo);
3130  CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
3131  ret.outputExpected = MakeTensor<uint8_t, 4>(outputInfo, expectedOutputData);
3132 
3133  return ret;
3134 }
3135 
3137  armnn::IWorkloadFactory& workloadFactory,
3138  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3139  armnn::IWorkloadFactory& refWorkloadFactory)
3140 {
3141  return CompareConvolution2dTestImpl<armnn::DataType::Float32>(
3142  workloadFactory, memoryManager, refWorkloadFactory);
3143 }
3144 
3146  armnn::IWorkloadFactory& workloadFactory,
3147  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3148  bool biasEnabled,
3149  const armnn::DataLayout layout)
3150 {
3151  return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3152  workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
3153 }
3154 
3156  armnn::IWorkloadFactory& workloadFactory,
3157  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3158  bool biasEnabled)
3159 {
3160  return DepthwiseConvolution2dNhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3161  workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
3162 }
3163 
3165  armnn::IWorkloadFactory& workloadFactory,
3166  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3167  bool biasEnabled,
3168  const armnn::DataLayout layout)
3169 {
3170  return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3171  workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
3172 }
3173 
3175  armnn::IWorkloadFactory& workloadFactory,
3176  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3177 {
3178  armnn::TensorInfo inputTensorInfo({ 1, 1, 2, 2 }, armnn::DataType::Float32);
3179  auto input = MakeTensor<float, 4>(inputTensorInfo, { 1.f, 2.f, 3.f, 4.f });
3180 
3181  std::vector<float> kernelData;
3182  std::vector<float> singleDepthKernel{ 1.f, -1.f, -1.f, 1.f };
3183  for (unsigned int i = 0; i < 64; ++i)
3184  {
3185  kernelData.insert(kernelData.end(), singleDepthKernel.begin(), singleDepthKernel.end());
3186  }
3187  armnn::TensorInfo kernelTensorInfo({ 64, 1, 2, 2 }, armnn::DataType::Float32);
3188  auto kernel = MakeTensor<float, 4>(kernelTensorInfo, kernelData);
3189 
3190  std::vector<float> expectedOutputData(64, 0.f);
3191  armnn::TensorInfo outputTensorInfo({ 1, 64, 1, 1 }, armnn::DataType::Float32);
3192  auto expectedOutput = MakeTensor<float, 4>(outputTensorInfo, expectedOutputData);
3193 
3194  return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3195  workloadFactory,
3196  memoryManager,
3197  input,
3198  kernel,
3199  boost::multi_array<float, 1>(),
3200  expectedOutput,
3201  0.f,
3202  0,
3204 }
3205 
3207  armnn::IWorkloadFactory& workloadFactory,
3208  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3209  bool biasEnabled,
3210  const armnn::DataLayout layout)
3211 {
3212  return DepthwiseConvolution2dAsymmetricTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3213  workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
3214 }
3215 
3217  armnn::IWorkloadFactory& workloadFactory,
3218  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3219  bool biasEnabled,
3220  const armnn::DataLayout layout)
3221 {
3222  return DepthwiseConvolution2dTestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3223  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
3224 }
3225 
3227  armnn::IWorkloadFactory& workloadFactory,
3228  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3229  bool biasEnabled,
3230  const armnn::DataLayout layout)
3231 {
3232  return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3233  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
3234 }
3235 
3237  armnn::IWorkloadFactory& workloadFactory,
3238  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3239 {
3240  return SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3241  workloadFactory,
3242  memoryManager,
3243  0.f,
3244  0,
3245  false);
3246 }
3247 
3249  armnn::IWorkloadFactory& workloadFactory,
3250  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3251  bool biasEnabled,
3252  const armnn::DataLayout layout)
3253 {
3254  return DepthwiseConvolution2dTestImpl<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3255  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
3256 }
3257 
3259  armnn::IWorkloadFactory& workloadFactory,
3260  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3261  bool biasEnabled,
3262  const armnn::DataLayout layout)
3263 {
3264  return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3265  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
3266 }
3267 
3269  armnn::IWorkloadFactory& workloadFactory,
3270  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3271  const armnn::DataLayout layout)
3272 {
3273  using namespace armnn;
3274 
3275  const DataType inputType = DataType::QAsymmU8;
3276  const DataType kernelType = DataType::QSymmS8;
3277  const DataType biasType = DataType::Signed32;
3278 
3279  TensorInfo inputInfo ({ 1, 3, 3, 2 }, inputType, 0.5f, 128); // N H W C
3280  TensorInfo outputInfo({ 1, 2, 2, 4 }, inputType, 1.0f, 128); // N H W C
3281 
3282  const std::vector<float> quantScales{ 1.0f, 0.5f, 1.0f, 0.5f };
3283  const unsigned int quantDimension = 0;
3284  TensorInfo kernelInfo({ 2, 2, 2, 2 }, kernelType, quantScales, quantDimension); // M I H W
3285 
3286  const std::vector<float> biasQuantScales{ 0.5f, 0.25f, 0.5f, 0.25f };
3287  constexpr unsigned int biasQuantDimension = 0;
3288  TensorInfo biasInfo({ 4 }, biasType, biasQuantScales, biasQuantDimension);
3289 
3290  std::vector<uint8_t> inputData =
3291  {
3292  129, 130,
3293  129, 130,
3294  129, 130,
3295  129, 130,
3296  129, 130,
3297  129, 130,
3298  129, 130,
3299  129, 130,
3300  129, 130
3301  };
3302 
3303  std::vector<int8_t> kernelData =
3304  {
3305  1, 1, 1, 1,
3306  1, 1, 1, 1,
3307  1, 1, 1, 1,
3308  1, 1, 1, 1
3309  };
3310 
3311  std::vector<int32_t> biasData =
3312  {
3313  4, 4, 4, 4
3314  };
3315 
3316  std::vector<uint8_t> expectedOutputData =
3317  {
3318  132, 130, 134, 131,
3319  132, 130, 134, 131,
3320  132, 130, 134, 131,
3321  132, 130, 134, 131
3322  };
3323 
3324  if (layout == DataLayout::NCHW)
3325  {
3326  PermuteTensorNhwcToNchw(inputInfo, inputData);
3327  PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
3328  }
3329 
3331  descriptor.m_StrideX = 1;
3332  descriptor.m_StrideY = 1;
3333  descriptor.m_PadLeft = 0;
3334  descriptor.m_PadRight = 0;
3335  descriptor.m_PadTop = 0;
3336  descriptor.m_PadBottom = 0;
3337  descriptor.m_DilationX = 1;
3338  descriptor.m_DilationY = 1;
3339  descriptor.m_BiasEnabled = true;
3340  descriptor.m_DataLayout = layout;
3341 
3342  std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
3343  std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
3344 
3345  WorkloadInfo workloadInfo;
3346  ScopedCpuTensorHandle weightTensor(kernelInfo);
3347  ScopedCpuTensorHandle biasTensor(biasInfo);
3348 
3349  AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
3350  AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
3351 
3352  DepthwiseConvolution2dQueueDescriptor queueDescriptor;
3353  queueDescriptor.m_Parameters = descriptor;
3354  queueDescriptor.m_Weight = &weightTensor;
3355  queueDescriptor.m_Bias = &biasTensor;
3356 
3357  AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
3358  AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
3359 
3360  std::unique_ptr<IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(queueDescriptor, workloadInfo);
3361  inputHandle->Allocate();
3362  outputHandle->Allocate();
3363 
3364  CopyDataToITensorHandle(inputHandle.get(), inputData.data());
3365 
3366  ExecuteWorkload(*workload, memoryManager);
3367 
3368  LayerTestResult<uint8_t, 4> ret(outputInfo);
3369 
3370  CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
3371  ret.outputExpected = MakeTensor<uint8_t, 4>(outputInfo, expectedOutputData);
3372 
3373  return ret;
3374 }
3375 
3377  armnn::IWorkloadFactory& workloadFactory,
3378  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3379  armnn::IWorkloadFactory& refWorkloadFactory,
3380  const armnn::DataLayout layout)
3381 {
3382  return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::Float32>(
3383  workloadFactory, memoryManager, refWorkloadFactory, layout);
3384 }
3385 
3387  armnn::IWorkloadFactory& workloadFactory,
3388  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3389  armnn::IWorkloadFactory& refWorkloadFactory,
3390  const armnn::DataLayout layout)
3391 {
3392  return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::QAsymmU8>(
3393  workloadFactory, memoryManager, refWorkloadFactory, layout);
3394 }
virtual std::unique_ptr< IWorkload > CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const
void AllocateAndCopyDataToITensorHandle(armnn::ITensorHandle *tensorHandle, const void *memory)
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
LayerTestResult< int16_t, 4 > SimpleConvolution2d3x3QSymm16Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
uint32_t m_PadBottom
Padding bottom value in the height dimension.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
LayerTestResult< uint8_t, 4 > Convolution2dPerAxisQuantTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::DataLayout layout)
LayerTestResult< T, 4 > Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
void ApplyBias(std::vector< T > &v, float vScale, int32_t vOffset, const std::vector< B > &bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h)
LayerTestResult< T, 4 > SimpleConvolution2d3x3Stride2x2TestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout &dataLayout)
LayerTestResult< uint8_t, 4 > DepthwiseConvolution2dPerAxisQuantTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::DataLayout layout)
LayerTestResult< T, 4 > DepthwiseConvolution2dDepthMul1TestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout)
int32_t GetQuantizationOffset() const
Definition: Tensor.cpp:264
uint32_t m_PadRight
Padding right value in the width dimension.
LayerTestResult< int16_t, 4 > DepthwiseConvolution2dDepthMul1Int16Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > DepthwiseConvolution2dAsymmetricTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const boost::multi_array< T, 4 > &input, const boost::multi_array< T, 4 > &kernel, const boost::multi_array< B, 1 > &bias, const boost::multi_array< T, 4 > &outputExpected, float qScale, int32_t qOffset, const armnn::DataLayout layout, uint32_t padLeft=0, uint32_t padTop=0, uint32_t padRight=0, uint32_t padBottom=0, uint32_t strideX=1, uint32_t strideY=1)
void Permute(const armnn::TensorShape &dstShape, const armnn::PermutationVector &mappings, const void *src, void *dst, size_t dataTypeSize)
Definition: Permute.cpp:121
LayerTestResult< T, 4 > Convolution2d3x3Dilation3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
uint32_t m_DilationX
Dilation factor value for width dimension.
LayerTestResult< T, 4 > SimpleConvolution2dNhwcTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const boost::multi_array< T, 4 > &input, const boost::multi_array< T, 4 > &kernel, const boost::multi_array< B, 1 > &bias, const boost::multi_array< T, 4 > &outputExpected, const armnn::DataLayout dataLayout, float qScale, int32_t qOffset, uint32_t padLeft=1, uint32_t padTop=1, uint32_t padRight=1, uint32_t padBottom=1, uint32_t strideX=1, uint32_t strideY=1)
void CopyDataToITensorHandle(armnn::ITensorHandle *tensorHandle, const void *memory)
LayerTestResult< float, 4 > SimpleConvolution2d3x3Stride2x2Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
armnn::DataLayout GetDataLayout() const
LayerTestResult< T, 4 > DepthwiseConvolution2d2x3x3Dilation3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
uint32_t m_PadLeft
Padding left value in the width dimension.
armnn::TensorInfo GetTensorInfo(unsigned int numberOfBatches, unsigned int numberOfChannels, unsigned int height, unsigned int width, const armnn::DataLayout dataLayout, const armnn::DataType dataType)
Definition: TensorUtils.cpp:38
LayerTestResult< T, 4 > Convolution2d3x3DilationTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const std::vector< float > &inputNoQuantizedValues, armnn::TensorInfo &inputTensorInfo, const std::vector< float > &kernelNoQuantizedValues, armnn::TensorInfo &kernelTensorInfo, const std::vector< float > &outputExpectedNoQuantizedValues, armnn::TensorInfo &outputTensorInfo, uint32_t dilationX, uint32_t dilationY, armnn::DataLayout layout=armnn::DataLayout::NCHW, uint32_t padLeft=0, uint32_t padTop=0, uint32_t padRight=0, uint32_t padBottom=0, uint32_t strideX=1, uint32_t strideY=1, bool biasEnabled=false)
LayerTestResult< int16_t, 4 > SimpleConvolution2d3x5QSymm16Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
boost::multi_array< T, 1 > GetBias2(bool biasEnabled, float qScale)
boost::multi_array< T, 1 > GetBias4(bool biasEnabled, float qScale)
virtual std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo, const bool IsMemoryManaged=true) const =0
LayerTestResult< T, 4 > DepthwiseConvolution2dMult2Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< float, 4 > SimpleConvolution2d3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
void CopyDataFromITensorHandle(void *memory, const armnn::ITensorHandle *tensorHandle)
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
boost::multi_array< T, n > output
uint32_t m_PadBottom
Padding bottom value in the height dimension.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
uint32_t m_PadTop
Padding top value in the height dimension.
LayerTestResult< float, 4 > SimpleConvolution2d3x3NhwcTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled)
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:66
LayerTestResult< float, 4 > DepthwiseConvolution2dAsymmetricTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< uint8_t, 4 > SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
const ConstCpuTensorHandle * m_Weight
LayerTestResult< float, 4 > Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::DataLayout layout)
LayerTestResult< float, 4 > DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
uint32_t m_PadTop
Padding top value in the height dimension.
uint32_t m_PadLeft
Padding left value in the width dimension.
LayerTestResult< int16_t, 4 > DepthwiseConvolution2dInt16Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > DepthwiseConvolution2dAsymmetricTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > CompareConvolution2dTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::IWorkloadFactory &refWorkloadFactory)
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
LayerTestResult< T, 4 > Convolution1dTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled)
LayerTestResult< T, 4 > DepthwiseConvolution2dNhwcTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled)
void SetQuantizationScale(float scale)
Definition: Tensor.cpp:259
LayerTestResult< T, 4 > DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< uint8_t, 4 > SimpleConvolution2d3x3Uint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > SimpleConvolution2d3x3TestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout)
boost::multi_array< T, n > outputExpected
LayerTestResult< T, 4 > SimpleConvolution2d3x5TestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< uint8_t, 4 > Convolution1dUint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled)
void PermuteTensorNhwcToNchw(armnn::TensorInfo &tensorInfo, std::vector< T > &tensorData)
LayerTestResult< uint8_t, 4 > DepthwiseConvolution2dDepthMul1Uint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
const ConstCpuTensorHandle * m_Bias
unsigned int GetChannelsIndex() const
bool m_BiasEnabled
Enable/disable bias.
LayerTestResult< uint8_t, 4 > DepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
float SelectiveDequantize(T value, float scale, int32_t offset)
LayerTestResult< float, 4 > Convolution2dAsymmetricPaddingTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::DataLayout layout)
LayerTestResult< float, 4 > CompareDepthwiseConvolution2dFloatTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::IWorkloadFactory &refWorkloadFactory, const armnn::DataLayout layout)
LayerTestResult< T, 4 > DepthwiseConvolution2dMult4Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< float, 4 > SimpleConvolution2d3x5Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< float, 4 > SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager)
DataLayout
Definition: Types.hpp:48
bool m_BiasEnabled
Enable/disable bias.
DataType
Definition: Types.hpp:32
LayerTestResult< T, 4 > SimpleConvolution2dTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const boost::multi_array< T, 4 > &originalInput, const boost::multi_array< T, 4 > &originalKernel, const boost::multi_array< B, 1 > &bias, const boost::multi_array< T, 4 > &originalOutputExpected, float qScale, int32_t qOffset, const armnn::DataLayout layout=armnn::DataLayout::NCHW, uint32_t padLeft=0, uint32_t padTop=0, uint32_t padRight=0, uint32_t padBottom=0, uint32_t strideX=1, uint32_t strideY=1, uint32_t dilationX=1, uint32_t dilationY=1)
boost::multi_array< T, 1 > GetBias8(bool biasEnabled, float qScale)
virtual std::unique_ptr< IWorkload > CreateConvolution2d(const Convolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const
DataType GetBiasDataType(DataType inputDataType)
LayerTestResult< float, 4 > DepthwiseConvolution2dTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
uint32_t m_DilationY
Dilation along y axis.
LayerTestResult< T, 4 > SimpleConvolution2d3x3NhwcTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, armnn::DataLayout dataLayout)
LayerTestResult< uint8_t, 4 > CompareDepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::IWorkloadFactory &refWorkloadFactory, const armnn::DataLayout layout)
boost::multi_array< T, 1 > GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout)
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
LayerTestResult< T, 4 > SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled)
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:275
A Convolution2dDescriptor for the Convolution2dLayer.
uint32_t m_DilationX
Dilation along x axis.
LayerTestResult< float, 4 > DepthwiseConvolution2dDepthNhwcTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled)
LayerTestResult< T, 4 > Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::DataLayout layout, float qScale, int32_t qOffset)
LayerTestResult< float, 4 > DepthwiseConvolution2dDepthMul64Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager)
LayerTestResult< T, 4 > DepthwiseConvolution2d3x3DilationTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const std::vector< float > &inputNoQuantizedValues, armnn::TensorInfo &inputTensorInfo, const std::vector< float > &kernelNoQuantizedValues, armnn::TensorInfo &kernelTensorInfo, const std::vector< float > &outputExpectedNoQuantizedValues, armnn::TensorInfo &outputTensorInfo, uint32_t dilationX, uint32_t dilationY, armnn::DataLayout layout=armnn::DataLayout::NCHW, bool biasEnabled=false)
const TensorShape & GetShape() const
Definition: Tensor.hpp:88
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
LayerTestResult< T, 4 > DepthwiseConvolution2d3x3Dilation3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< float, 4 > CompareConvolution2dTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::IWorkloadFactory &refWorkloadFactory)
LayerTestResult< T, 4 > CompareDepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::IWorkloadFactory &refWorkloadFactory, const armnnUtils::DataLayoutIndexed &layout)
LayerTestResult< T, 4 > Convolution2d2x3x3Dilation3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::DataLayout layout, float qScale, int32_t qOffset)
LayerTestResult< float, 4 > Convolution1dTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled)
float GetQuantizationScale() const
Definition: Tensor.cpp:247
uint32_t m_DilationY
Dilation factor value for height dimension.
uint32_t m_PadRight
Padding right value in the width dimension.