ArmNN
 20.02
Conv2dTestImpl.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "Conv2dTestImpl.hpp"
7 
8 #include <QuantizeHelper.hpp>
10 
13 #include <armnnUtils/Permute.hpp>
14 
16 
20 
21 #include <test/TensorHelpers.hpp>
22 
23 #include <boost/numeric/conversion/cast.hpp>
24 
25 #include <string>
26 
27 //
28 // Static data
29 //
30 
31 // 2-channel bias used by a number of Conv2d tests.
32 static std::vector<float> Bias2({0, 2});
33 
34 static std::vector<float> Bias4({1, 2, 3, 4});
35 
36 static std::vector<float> Bias8({1, 2, 3, 4, 1, 2, 3, 4});
37 
38 // 3-channel 16x8 image used as common input data for a number of Conv2d tests.
39 static std::vector<float> ConvInput3x8x16({
40  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
41  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
42  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
43  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
44  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
45  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
46  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
47  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
48  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
56  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
57  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
58  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
59  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
60  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
61  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
62  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
63  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
64 });
65 
66 using namespace armnnUtils;
67 
68 //
69 // Helper templates
70 //
71 
72 // Helper template that returns either Bias2 or an empty vector depending on whether bias is enabled.
73 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
74 boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale)
75 {
76  if(biasEnabled)
77  {
78  armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, ArmnnType);
79  boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias2, qScale, 0.0f));
80  return bias;
81  }
82  else
83  {
84  return boost::multi_array<T, 1>();
85  }
86 }
87 
88 // Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
89 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
90 boost::multi_array<T, 1> GetBias4(bool biasEnabled, float qScale)
91 {
92  if(biasEnabled)
93  {
94  armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
95  boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias4, qScale, 0.0f));
96  return bias;
97  }
98  else
99  {
100  return boost::multi_array<T, 1>();
101  }
102 }
103 
104 // Helper template that returns either Bias8 or an empty vector depending on whether bias is enabled.
105 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
106 boost::multi_array<T, 1> GetBias8(bool biasEnabled, float qScale)
107 {
108  if(biasEnabled)
109  {
110  armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
111  boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias8, qScale, 0.0f));
112  return bias;
113  }
114  else
115  {
116  return boost::multi_array<T, 1>();
117  }
118 }
119 
120 // Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
121 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
122 boost::multi_array<T, 1> GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout)
123 {
124  const armnnUtils::DataLayoutIndexed dataLayoutIndexed(layout);
125  const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
126  const unsigned int outputChannels = outputInfo.GetShape()[channelsIndex];
127 
128  switch (outputChannels)
129  {
130  case 2:
131  default:
132  {
133  return GetBias2<ArmnnType>(biasEnabled, qScale);
134  }
135  case 4:
136  {
137  return GetBias4<ArmnnType>(biasEnabled, qScale);
138  }
139  case 8:
140  {
141  return GetBias8<ArmnnType>(biasEnabled, qScale);
142  }
143  }
144 }
145 
146 //
147 // Implementation templates
148 //
149 
150 // Mapping from input type to bias type for fully connected layers.
151 // float => float, uint8_t => int32_t
152 template<typename T>
153 struct FullyConnectedBiasTypeForInputType;
154 
155 template<>
156 struct FullyConnectedBiasTypeForInputType<float>
157 {
158  using Type = float;
159 };
160 
161 template<>
162 struct FullyConnectedBiasTypeForInputType<uint8_t>
163 {
164  using Type = int32_t;
165 };
166 
167 // Modifies a std::vector in-place using a specified bias.
168 template<typename T, typename B>
169 void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset,
170  const std::vector<B>& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h)
171 {
172  BOOST_ASSERT_MSG((armnn::IsQuantizedType<T>() && vScale != 0.0f) || (!armnn::IsQuantizedType<T>()),
173  "Invalid type and parameter combination.");
174  BOOST_ASSERT_MSG((armnn::IsQuantizedType<B>() && bScale != 0.0f) || (!armnn::IsQuantizedType<B>()),
175  "Invalid type and parameter combination.");
176 
177  // Note we need to dequantize and re-quantize the image value and the bias.
178  for (uint32_t i = 0; i < bias.size(); ++i)
179  {
180  float dBias = SelectiveDequantize(bias[i], bScale, bOffset);
181  for (uint32_t y = 0; y < h; ++y)
182  {
183  for (uint32_t x = 0; x < w; ++x)
184  {
185  uint32_t offset = (i * h + y) * w + x;
186  BOOST_ASSERT(offset < v.size());
187  T& outRef = v[offset];
188  float dOutput = SelectiveDequantize(outRef, vScale, vOffset);
189  outRef = SelectiveQuantize<T>(dOutput + dBias, vScale, vOffset);
190  }
191  }
192  }
193 }
194 
195 //
196 // Convolution2d implementations
197 //
198 
199 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
202  armnn::IWorkloadFactory& workloadFactory,
204  const boost::multi_array<T, 4>& originalInput,
205  const boost::multi_array<T, 4>& originalKernel,
206  const boost::multi_array<B, 1>& bias,
207  const boost::multi_array<T, 4>& originalOutputExpected,
208  float qScale,
209  int32_t qOffset,
211  uint32_t padLeft = 0,
212  uint32_t padTop = 0,
213  uint32_t padRight = 0,
214  uint32_t padBottom = 0,
215  uint32_t strideX = 1,
216  uint32_t strideY = 1,
217  uint32_t dilationX = 1,
218  uint32_t dilationY = 1)
219 {
220  armnn::IgnoreUnused(memoryManager);
221  unsigned int inputHeight = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
222  unsigned int inputWidth = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
223  unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
224  unsigned int inputNum = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
225 
226  unsigned int outputHeight = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
227  unsigned int outputWidth = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
228  unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
229  unsigned int outputNum = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
230 
231  unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
232  unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
233  unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
234  unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
235 
236  bool biasEnabled = bias.size() > 0;
237 
238  // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
239  BOOST_ASSERT(inputNum == 1);
240  BOOST_ASSERT(outputNum == 1);
241 
242  // If a bias is used, its size must equal the number of output channels.
243  BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels);
244 
245 
246  // Note these tensors will use two (identical) batches.
247  armnn::TensorInfo inputTensorInfo =
248  armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
249  armnn::TensorInfo outputTensorInfo =
250  armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
251  armnn::TensorInfo kernelDesc =
252  armnnUtils::GetTensorInfo(kernelDepthMul, kernelChannels, kernelHeight, kernelWidth, layout, ArmnnType);
253  armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
254 
255  // Set quantization parameters if the requested type is a quantized type.
256  if(armnn::IsQuantizedType<T>())
257  {
258  inputTensorInfo.SetQuantizationScale(qScale);
259  inputTensorInfo.SetQuantizationOffset(qOffset);
260  outputTensorInfo.SetQuantizationScale(qScale);
261  outputTensorInfo.SetQuantizationOffset(qOffset);
262  kernelDesc.SetQuantizationScale(qScale);
263  kernelDesc.SetQuantizationOffset(qOffset);
264  biasDesc.SetQuantizationScale(qScale*qScale);
265  biasDesc.SetQuantizationOffset(0);
266  }
267 
268  LayerTestResult<T, 4> ret(outputTensorInfo);
269 
270  // Construct input data - two batches of the same input image.
271  std::vector<T> inputImage;
272  inputImage.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth);
273  std::vector<T> inputData;
274  inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
275  inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
276 
277  // at this point if we require it permute the input data
278  const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
279  if (layout == armnn::DataLayout::NHWC)
280  {
281  std::vector<T> tmp(inputData.size());
282  armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
283  inputData = tmp;
284  }
285 
286  auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
287 
288  std::vector<T> outputImage;
289  outputImage.assign(originalOutputExpected.data(),
290  originalOutputExpected.data() + outputChannels*outputHeight*outputWidth);
291 
292  // Apply bias to output image if it is enabled.
293  if(biasEnabled)
294  {
295  std::vector<T> biasV;
296  biasV.assign(bias.data(), bias.data() + outputChannels);
297  ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
298  biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
299  outputWidth, outputHeight);
300  }
301 
302  // Construct expected output data - two identical images.
303  std::vector<T> outputData;
304  outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
305  outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
306 
307  // at this point if we require it permute the expected output
308  if (layout == armnn::DataLayout::NHWC)
309  {
310  std::vector<T> tmp(outputData.size());
311  armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
312  outputData = tmp;
313  }
314  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
315 
316  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
317  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
318 
321  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
322  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
323  // Permute the kernel if necessary
324  boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
325  if (layout == armnn::DataLayout::NHWC)
326  {
327  armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data(), sizeof(T));
328  }
329  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
330 
331  if(biasEnabled)
332  {
333  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
334  }
335 
336  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
337  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
338 
339  data.m_Weight = &weightsTensor;
340  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
341  data.m_Parameters.m_StrideX = strideX;
342  data.m_Parameters.m_StrideY = strideY;
343  data.m_Parameters.m_PadLeft = padLeft;
344  data.m_Parameters.m_PadRight = padRight;
345  data.m_Parameters.m_PadTop = padTop;
346  data.m_Parameters.m_PadBottom = padBottom;
347  data.m_Parameters.m_BiasEnabled = biasEnabled;
348  data.m_Parameters.m_DataLayout = layout;
349  data.m_Parameters.m_DilationX = dilationX;
350  data.m_Parameters.m_DilationY = dilationY;
351 
352  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
353  inputHandle->Allocate();
354  outputHandle->Allocate();
355 
356  CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
357 
358  ExecuteWorkload(*workload, memoryManager);
359 
360  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
361 
362  return ret;
363 }
364 
365 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
368  armnn::IWorkloadFactory& workloadFactory,
370  const boost::multi_array<T, 4>& input,
371  const boost::multi_array<T, 4>& kernel,
372  const boost::multi_array<B, 1>& bias,
373  const boost::multi_array<T, 4>& outputExpected,
374  const armnn::DataLayout dataLayout,
375  float qScale,
376  int32_t qOffset,
377  uint32_t padLeft = 1,
378  uint32_t padTop = 1,
379  uint32_t padRight = 1,
380  uint32_t padBottom = 1,
381  uint32_t strideX = 1,
382  uint32_t strideY = 1)
383 {
384  armnn::IgnoreUnused(qScale, qOffset);
385  unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]);
386  unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[3]);
387  unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[1]);
388  unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[2]);
389 
390  unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
391  unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
392  unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
393  unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
394 
395  unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
396  unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
397  unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
398  unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
399 
400  bool biasEnabled = bias.size() > 0;
401 
402  // Creates the tensors.
403  armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, ArmnnType);
404  armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels},
405  ArmnnType);
406  armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, ArmnnType);
407  armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
408 
409  // Construct the input data.
410  std::vector<T> inputData;
411  inputData.assign(input.data(), input.data() + inputHeight*inputWidth*inputChannels);
412  auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
413 
414  // Construct the output data, with bias applied, as appropriate.
415  std::vector<T> outputData;
416  outputData.assign(outputExpected.data(), outputExpected.data() + outputHeight*outputWidth*outputChannels);
417 
418  LayerTestResult<T, 4> ret(outputTensorInfo);
419  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
420 
421  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
422  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
423 
424  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
425  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
426 
427  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
428 
430 
431  data.m_Weight = &weightsTensor;
432  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
433  data.m_Parameters.m_StrideX = strideX;
434  data.m_Parameters.m_StrideY = strideY;
435  data.m_Parameters.m_PadLeft = padLeft;
436  data.m_Parameters.m_PadRight = padRight;
437  data.m_Parameters.m_PadTop = padTop;
438  data.m_Parameters.m_PadBottom = padBottom;
439  data.m_Parameters.m_BiasEnabled = biasEnabled;
440  data.m_Parameters.m_DataLayout = dataLayout;
441 
443  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
444  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
445 
446  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
447  inputHandle->Allocate();
448  outputHandle->Allocate();
449 
450  CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
451 
452  ExecuteWorkload(*workload, memoryManager);
453 
454  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
455 
456  return ret;
457 }
458 
459 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
461  armnn::IWorkloadFactory& workloadFactory,
463  float qScale,
464  int32_t qOffset,
465  bool biasEnabled)
466 {
468  // Until we have a specialist 1D convolution layer, we can fake one using
469  // 2D convolution with the final dimension set to 1.
470  // I don't anticipate this being particularly slow, given that convolution is implemented
471  // as a matrix multiplication, at which point dimension doesn't matter.
472 
473  unsigned int batchSize = 1;
474  unsigned int inputChannels = 2;
475  unsigned int outputChannels = 3;
476  unsigned int inputSize = 5; // The 1D size (could view as 'width' or 'height').
477  unsigned int kernelSize = 3;
478  unsigned int padSize = 2;
479  unsigned int stride = 1;
480  unsigned int outputSize = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride.
481 
482  armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, ArmnnType);
483  armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, ArmnnType);
484  armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, ArmnnType);
485  armnn::TensorInfo biasInfo({outputChannels}, ArmnnBType);
486 
487  // Set quantization parameters if the requested type is a quantized type.
488  if(armnn::IsQuantizedType<T>())
489  {
490  inputInfo.SetQuantizationScale(qScale);
491  inputInfo.SetQuantizationOffset(qOffset);
492  outputInfo.SetQuantizationScale(qScale);
493  outputInfo.SetQuantizationOffset(qOffset);
494  kernelInfo.SetQuantizationScale(qScale);
495  kernelInfo.SetQuantizationOffset(qOffset);
496  biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale());
497  biasInfo.SetQuantizationOffset(0);
498  }
499 
500  std::vector<T> inputData = QuantizedVector<T>(
501  {
502  5.0f, -2.0f, 2.5f, 0.0f, 1.0f,
503  -3.0f, 3.2f, 5.0f, 2.0f, 3.0f,
504  },
505  inputInfo.GetQuantizationScale(),
506  inputInfo.GetQuantizationOffset());
507 
508  std::vector<T> kernelData = QuantizedVector<T>(
509  {
510  1.0f, 0.0f, 0.0f,
511  0.0f, 2.0f, -1.5f,
512 
513  0.0f, 0.0f, 0.0f,
514  0.2f, 0.2f, 0.2f,
515 
516  0.5f, 0.0f, 0.5f,
517  0.0f, -1.0f, 0.0f
518  },
519  kernelInfo.GetQuantizationScale(),
520  kernelInfo.GetQuantizationOffset());
521 
522  std::vector<B> biasData =
523  QuantizedVector<B>({ 1.0f, 0.0f, 0.0f }, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset());
524 
525  std::vector<T> outputData = QuantizedVector<T>(
526  {
527  4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f,
528  -0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f,
529  2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f
530  },
531  outputInfo.GetQuantizationScale(),
532  outputInfo.GetQuantizationOffset());
533 
534  // Optionally apply bias to output image.
535  if(biasEnabled)
536  {
537  ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(),
538  biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(),
539  1, outputSize);
540  }
541 
542  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
543  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
544 
547  armnn::ScopedCpuTensorHandle weightsTensor(kernelInfo);
548  armnn::ScopedCpuTensorHandle biasTensor(biasInfo);
549 
550  AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data());
551  AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
552 
553  AddInputToWorkload(data, info, inputInfo, inputHandle.get());
554  AddOutputToWorkload(data, info, outputInfo, outputHandle.get());
555 
556  data.m_Weight = &weightsTensor;
557  data.m_Bias = &biasTensor;
558  data.m_Parameters.m_StrideX = 1;
559  data.m_Parameters.m_StrideY = stride;
560  data.m_Parameters.m_PadLeft = 0;
561  data.m_Parameters.m_PadRight = 0;
562  data.m_Parameters.m_PadTop = padSize;
563  data.m_Parameters.m_PadBottom = padSize;
564  data.m_Parameters.m_BiasEnabled = biasEnabled;
565 
566  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
567  inputHandle->Allocate();
568  outputHandle->Allocate();
569 
570  CopyDataToITensorHandle(inputHandle.get(), inputData.data());
571 
572  ExecuteWorkload(*workload, memoryManager);
573 
574  // Output
575  LayerTestResult<T,4> ret(outputInfo);
576  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
577  ret.outputExpected = MakeTensor<T, 4>(outputInfo, outputData);
578  return ret;
579 }
580 
581 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
583  armnn::IWorkloadFactory& workloadFactory,
585  float qScale,
586  int32_t qOffset,
587  bool biasEnabled,
588  armnn::DataLayout dataLayout)
589 {
590  armnn::IgnoreUnused(biasEnabled);
591  // Use common single-batch 5x5 image.
592 
593  armnn::TensorInfo inputDesc({1, 3, 4, 1}, ArmnnType);
594  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
595  {
596  1, 5, 2, 3,
597  8, 7, 3, 6,
598  3, 3, 9, 1
599  });
600 
601 
602  // Use a 2-element batch of 3-channel 3x3 kernels.
603  armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
604  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, {
605  4, 5, 6,
606  0, 0, 0,
607  3, 2, 1
608  });
609 
610  // Expected output is 1 batch of a 5x5 image.
611  armnn::TensorInfo outputDesc({1, 3, 4, 1}, ArmnnType);
612 
613  const std::vector<float> outputData =
614  {
615  23, 41, 33, 21,
616  44, 65, 76, 52,
617  82, 85, 79, 42
618  };
619 
620  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
621 
622  return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
623  workloadFactory,
624  memoryManager,
625  input,
626  kernel,
627  boost::multi_array<T, 1>(),
628  expectedOutput,
629  dataLayout,
630  qScale,
631  qOffset);
632 }
633 
634 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
636  armnn::IWorkloadFactory& workloadFactory,
638  float qScale,
639  int32_t qOffset,
640  bool biasEnabled,
641  const armnn::DataLayout& dataLayout)
642 {
643  armnn::IgnoreUnused(biasEnabled);
644 
645  // Input is a single-batch, 1 channel, 5x5 image.
646  armnn::TensorInfo inputDesc({1, 5, 5, 1}, ArmnnType);
647  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
648  {
649  1, 5, 2, 3, 5,
650  8, 7, 3, 6, 3,
651  3, 3, 9, 1, 9,
652  4, 1, 8, 1, 3,
653  6, 8, 1, 9, 2
654  });
655 
656  // Use a 3x3 kernel.
657  armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
658  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc,
659  {
660  4, 5, 6,
661  0, 0, 0,
662  3, 2, 1
663  });
664 
665  // Expected output is a single-batch, 1 channel, 3x3 image.
666  armnn::TensorInfo outputDesc({1, 3, 3, 1}, ArmnnType);
667 
668  const std::vector<T> outputData =
669  {
670  23, 33, 24,
671  91, 99, 48,
672  26, 50, 19
673  };
674 
675  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
676 
677  uint32_t padLeft = 1;
678  uint32_t padTop = 1;
679  uint32_t padRight = 1;
680  uint32_t padBottom = 1;
681  uint32_t strideX = 2;
682  uint32_t strideY = 2;
683 
684  return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
685  workloadFactory,
686  memoryManager,
687  input,
688  kernel,
689  boost::multi_array<T, 1>(),
690  expectedOutput,
691  dataLayout,
692  qScale,
693  qOffset,
694  padLeft,
695  padTop,
696  padRight,
697  padBottom,
698  strideX,
699  strideY);
700 }
701 
702 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
704  armnn::IWorkloadFactory& workloadFactory,
706  float qScale,
707  int32_t qOffset,
708  bool biasEnabled,
709  const armnn::DataLayout layout)
710 {
711  // Use common single-batch 3-channel 16x8 image.
712  armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
713  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset));
714 
715  // Use a 2-element batch with 3-channel 3x5 kernels.
716  armnn::TensorInfo kernelDesc({2, 3, 5, 3}, ArmnnType);
717  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
718  QuantizedVector<T>({
719  1, 1, 1,
720  1, -1, 1,
721  1, 1, 1,
722  1, 1, 1,
723  1, 1, 1,
724 
725  0, 0, 0,
726  0, 0, 0,
727  0, 0, 0,
728  0, 0, 0,
729  0, 0, 0,
730 
731  2, 2, 2,
732  2, 2, 2,
733  2, 2, 2,
734  2, 2, 2,
735  2, 2, 2,
736 
737 
738  0, 0, 0,
739  0, 0, 0,
740  0, 0, 0,
741  0, 0, 0,
742  0, 0, 0,
743 
744  1, 1, 1,
745  1, 1, 1,
746  1, 1, 1,
747  1, 1, 1,
748  1, 1, 1,
749 
750  0, 0, 0,
751  0, 0, 0,
752  0, 0, 0,
753  0, 0, 0,
754  0, 0, 0
755  },
756  qScale, qOffset)));
757 
758  // Expected output is 2 batch elements of a 1-channel 14x4 image.
759  armnn::TensorInfo outputDesc({1, 2, 4, 14}, ArmnnType);
760  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
761  QuantizedVector<T>({
762  -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
763  -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
764  -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
765  -23.5f, -23.5f, -23.5f,
766  -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
767  -23.5f, -23.5f, -23.5f,
768 
769  5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
770  5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
771  5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
772  5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
773  },
774  qScale, qOffset)));
775 
776  return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
777  workloadFactory,
778  memoryManager,
779  input,
780  kernel,
781  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
782  expectedOutput,
783  qScale,
784  qOffset,
785  layout);
786 }
787 
788 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
789  typename T = armnn::ResolveType<ArmnnType>>
791  armnn::IWorkloadFactory& workloadFactory,
793  float qScale,
794  int32_t qOffset,
795  bool biasEnabled,
796  const armnn::DataLayout layout)
797 {
798  // Use a 3x3 kernel, which exercises ArmCompute's direct convolution path.
799 
800  // Use common single-batch 3-channel 16x8 image.
801  armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
802  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset));
803 
804  // Use a 2-element batch of 3-channel 3x3 kernels.
805  armnn::TensorInfo kernelDesc({2, 3, 3, 3}, ArmnnType);
806  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
807  QuantizedVector<T>({
808  1, 1, 1,
809  1, -1, 1,
810  1, 1, 1,
811 
812  0, 0, 0,
813  0, 0, 0,
814  0, 0, 0,
815 
816  2, 2, 2,
817  2, 2, 2,
818  2, 2, 2,
819 
820 
821  0, 0, 0,
822  0, 0, 0,
823  0, 0, 0,
824 
825  1, 1, 1,
826  1, 1, 1,
827  1, 1, 1,
828 
829  0, 0, 0,
830  0, 0, 0,
831  0, 0, 0
832  },
833  qScale, qOffset)));
834 
835  // Expected output is 1 batch of a 2-channel 14x6 image.
836  armnn::TensorInfo outputDesc({1, 2, 6, 14}, ArmnnType);
837  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
838  QuantizedVector<T>({
839  -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
840  -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
841  -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
842  -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
843  -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
844  -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
845 
846  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
847  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
848  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
849  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
850  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
851  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
852  },
853  qScale, qOffset)));
854 
855  return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
856  workloadFactory,
857  memoryManager,
858  input,
859  kernel,
860  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
861  expectedOutput,
862  qScale,
863  qOffset,
864  layout);
865 }
866 
867 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
868  typename T = armnn::ResolveType<ArmnnType>>
870  armnn::IWorkloadFactory& workloadFactory,
872  const armnn::DataLayout layout,
873  float qScale,
874  int32_t qOffset)
875 {
876  // Use a single-batch 1-channel 3x3 image as input.
877  armnn::TensorInfo inputDesc({1, 1, 3, 3}, ArmnnType);
878  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
879  QuantizedVector<T>({
880  11,21,31,
881  12,22,32,
882  13,23,33
883  },
884  qScale, qOffset)));
885 
886  // Use 1 batch of a 1-channel 2x2 kernel.
887  armnn::TensorInfo kernelDesc({1, 1, 2, 2}, ArmnnType);
888  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
889  QuantizedVector<T>({
890  -11,-21,
891  -12,-22,
892  },
893  qScale, qOffset)));
894 
895 // Expected output is 1 batch of a 1-channel 6x8 image.
896 // Manually calculated like this:
897 //[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..]
898 //[-11*0 -21*0 -12*0 -22*11 ; -11*0 -21*0 -12*11 -22*21 ; -11*0 -21*0 -12*21 -22*31 ; -11*0 -21*0 -12*31 -22*0 ..]
899 //[-11*0 -21*11 -12*0 -22*12 ; -11*11 -21*21 -12*12 -22*22 ; -11*21 -21*31 -12*22 -22*32 ; -11*31 -21*0 -12*32 -22*0 ..]
900 //[-11*0 -21*12 -12*0 -22*13 ; -11*12 -21*22 -12*13 -22*23 ; -11*22 -21*32 -12*23 -22*33 ; -11*32 -21*0 -12*33 -22*0 ..]
901 //[-11*0 -21*13 -12*0 -22*0 ; -11*13 -21*23 -12*0 -22*0 ; -11*23 -21*33 -12*0 -22*0 ; -11*33 -21*0 -12*0 -22*0 ..]
902 //[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..]
903 //[..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ..]
904  armnn::TensorInfo outputDesc({1, 1, 8, 6}, ArmnnType);
905  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
906  QuantizedVector<T>({
907  0, 0, 0, 0, 0, 0,
908  -242, -594, -934, -372, 0, 0,
909  -495, -1190, -1850, -725, 0, 0,
910  -538, -1256, -1916, -748, 0, 0,
911  -273, -626, -946, -363, 0, 0,
912  0, 0, 0, 0, 0, 0,
913  0, 0, 0, 0, 0, 0,
914  0, 0, 0, 0, 0, 0
915  },
916  qScale, qOffset)));
917 
918  return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
919  workloadFactory,
920  memoryManager,
921  input,
922  kernel,
923  GetBias2<ArmnnBType>(false, qScale * qScale),
924  expectedOutput,
925  qScale,
926  qOffset,
927  layout,
928  1, // Padding left.
929  2, // Padding top.
930  3, // Padding right.
931  4); // Padding bottom.
932 }
933 
934 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
935  typename T = armnn::ResolveType<ArmnnType>>
937  armnn::IWorkloadFactory& workloadFactory,
939  const armnn::DataLayout layout,
940  float qScale,
941  int32_t qOffset)
942 {
943  // Use a single-batch 1-channel 5x5 image as input.
944  armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, ArmnnType);
945  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
946  QuantizedVector<T>({
947  11,21,31,41,51,
948  12,22,32,42,52,
949  13,23,33,43,53,
950  14,24,34,44,54,
951  15,25,35,45,55,
952  }, qScale, qOffset)));
953 
954  // Use 1 batch of a 1-channel 4x4 kernel.
955  armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, ArmnnType);
956  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
957  QuantizedVector<T>({
958  -11,-21,-31,-41,
959  -12,-22,-32,-42,
960  -13,-23,-33,-43,
961  -14,-24,-34,-44,
962  },
963  qScale, qOffset)));
964 
965  // Expected output is 1 batch of a 1-channel 5x5 image.
966  armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, ArmnnType);
967  std::vector<T> myVec(outputDesc.GetNumElements(), 0);
968  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
969  QuantizedVector<T>({
970  -7140, -10580, -13940, -9300, -5230,
971  -9590, -14120, -18520, -12290, -6860,
972  -9980, -14560, -18960, -12560, -7000,
973  -7518, -10904, -14144, -9318, -5152,
974  -5032, -7256, -9376, -6142, -3368,
975  },
976  qScale, qOffset)));
977 
978  return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
979  workloadFactory,
980  memoryManager,
981  input,
982  kernel,
983  GetBias2<ArmnnBType>(false, qScale * qScale),
984  expectedOutput,
985  qScale,
986  qOffset,
987  layout,
988  1, // Padding left.
989  1, // Padding top.
990  2, // Padding right.
991  2); // Padding bottom.
992 }
993 
994 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
996  armnn::IWorkloadFactory& workloadFactory,
998  const std::vector<float>& inputNoQuantizedValues,
999  armnn::TensorInfo& inputTensorInfo,
1000  const std::vector<float>& kernelNoQuantizedValues,
1001  armnn::TensorInfo& kernelTensorInfo,
1002  const std::vector<float>& outputExpectedNoQuantizedValues,
1003  armnn::TensorInfo& outputTensorInfo,
1004  uint32_t dilationX,
1005  uint32_t dilationY,
1007  uint32_t padLeft = 0,
1008  uint32_t padTop = 0,
1009  uint32_t padRight = 0,
1010  uint32_t padBottom = 0,
1011  uint32_t strideX = 1,
1012  uint32_t strideY = 1,
1013  bool biasEnabled = false
1014 )
1015 {
1016  float qScale;
1017  int32_t qOffset;
1018  switch (ArmnnType)
1019  {
1021  {
1022  qScale = 0.1f;
1023  qOffset = 128;
1024  break;
1025  }
1027  {
1028  qScale = 0.1f;
1029  qOffset = 0;
1030  break;
1031  }
1033  default:
1034  {
1035  qScale = 0.f;
1036  qOffset = 0;
1037  break;
1038  }
1039  }
1040 
1041  inputTensorInfo.SetQuantizationScale(qScale);
1042  inputTensorInfo.SetQuantizationOffset(qOffset);
1043  kernelTensorInfo.SetQuantizationScale(qScale);
1044  kernelTensorInfo.SetQuantizationOffset(qOffset);
1045  outputTensorInfo.SetQuantizationScale(qScale);
1046  outputTensorInfo.SetQuantizationOffset(qOffset);
1047 
1048  auto input = MakeTensor<T, 4>(inputTensorInfo,
1049  std::vector<T>(QuantizedVector<T>(inputNoQuantizedValues,
1050  inputTensorInfo.GetQuantizationScale(),
1051  inputTensorInfo.GetQuantizationOffset())));
1052  auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
1053  std::vector<T>(QuantizedVector<T>(kernelNoQuantizedValues,
1054  kernelTensorInfo.GetQuantizationScale(),
1055  kernelTensorInfo.GetQuantizationOffset())));
1056  auto expectedOutput =
1057  MakeTensor<T, 4>(outputTensorInfo,
1058  std::vector<T>(QuantizedVector<T>(outputExpectedNoQuantizedValues,
1059  outputTensorInfo.GetQuantizationScale(),
1060  outputTensorInfo.GetQuantizationOffset())));
1061 
1062  return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
1063  workloadFactory,
1064  memoryManager,
1065  input,
1066  kernel,
1067  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
1068  expectedOutput,
1069  qScale,
1070  qOffset,
1071  layout,
1072  padLeft,
1073  padTop,
1074  padRight,
1075  padBottom,
1076  strideX,
1077  strideY,
1078  dilationX,
1079  dilationY);
1080 }
1081 
1082 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
1084  armnn::IWorkloadFactory& workloadFactory,
1086  bool biasEnabled,
1087  const armnn::DataLayout layout)
1088 {
1089  armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
1090  std::vector<float> inputNoQuantizedValues =
1091  {
1092  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1093  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1094  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1095  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1096  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1097  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1098  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1099  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1100  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1101  0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1102  };
1103 
1104  armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
1105  std::vector<float> kernelNoQuantizedValues =
1106  {
1107  1, 2, 3,
1108  4, 5, 6,
1109  7, 8, 9
1110  };
1111 
1112  // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
1113  // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
1114  armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
1115  std::vector<float> outputExpectedNoQuantizedValues =
1116  {
1117  6., 5., 5., 5.,
1118  6., 5., 5., 5.,
1119  6., 5., 5., 5.,
1120  3., 2., 2., 2.
1121  };
1122 
1123  return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1124  workloadFactory,
1125  memoryManager,
1126  inputNoQuantizedValues,
1127  inputTensorInfo,
1128  kernelNoQuantizedValues,
1129  kernelTensorInfo,
1130  outputExpectedNoQuantizedValues,
1131  outputTensorInfo,
1132  3,
1133  3,
1134  layout,
1135  biasEnabled);
1136 }
1137 
1138 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
1140  armnn::IWorkloadFactory& workloadFactory,
1142  bool biasEnabled,
1143  const armnn::DataLayout layout)
1144 {
1145  armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
1146  std::vector<float> inputNoQuantizedValues =
1147  {
1148  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1149  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1150  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1151  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1152  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1153  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1154  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1155  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1156  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1157  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1158 
1159  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1160  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1161  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1162  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1163  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1164  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1165  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1166  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1167  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1168  0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1169  };
1170 
1171  armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
1172  std::vector<float> kernelNoQuantizedValues =
1173  {
1174  1, 2, 3,
1175  4, 5, 6,
1176  7, 8, 9,
1177 
1178  1, 2, 3,
1179  4, 5, 6,
1180  7, 8, 9
1181  };
1182 
1183  // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
1184  // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
1185  armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
1186  std::vector<float> outputExpectedNoQuantizedValues =
1187  {
1188  12., 10., 10., 10.,
1189  12., 10., 10., 10.,
1190  12., 10., 10., 10.,
1191  6., 4., 4., 4.
1192  };
1193 
1194  return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1195  workloadFactory,
1196  memoryManager,
1197  inputNoQuantizedValues,
1198  inputTensorInfo,
1199  kernelNoQuantizedValues,
1200  kernelTensorInfo,
1201  outputExpectedNoQuantizedValues,
1202  outputTensorInfo,
1203  3,
1204  3,
1205  layout,
1206  biasEnabled);
1207 }
1208 
1209 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
1211  armnn::IWorkloadFactory &workloadFactory,
1213  bool biasEnabled,
1214  const armnn::DataLayout layout)
1215 {
1216  armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
1217  std::vector<float> inputNoQuantizedValues =
1218  {
1219  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1220  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1221  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1222  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1223  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1224  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1225  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1226  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1227  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1228  1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1229  };
1230 
1231  armnn::TensorInfo kernelTensorInfo({ 1, 1, 2, 2}, ArmnnType);
1232  std::vector<float> kernelNoQuantizedValues =
1233  {
1234  1, 2,
1235  3, 4
1236  };
1237 
1238  // Since the dilation rate is 2 this will dilate the kernel to be like 3x3: d(K-1)+1 --> 2 x (2-1) + 1 = 3,
1239  // therefore the output will be 4x4: (I − K + 2P)/S +1 => trunc ( (10 - 3 + 2x2 ) / 3 + 1 )
1240  // where, dilation size = d = 2; kernel size = K = 2; input size = I = 10; padding size = P = 2; stride = S = 3
1241  armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
1242  std::vector<float> outputExpectedNoQuantizedValues =
1243  {
1244  4, 7, 7, 3,
1245  6, 10, 10, 4,
1246  6, 10, 10, 4,
1247  2, 3, 3, 1
1248  };
1249  uint32_t padLeft = 1;
1250  uint32_t padTop = 1;
1251  uint32_t padRight = 1;
1252  uint32_t padBottom = 1;
1253 
1254  return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1255  workloadFactory,
1256  memoryManager,
1257  inputNoQuantizedValues,
1258  inputTensorInfo,
1259  kernelNoQuantizedValues,
1260  kernelTensorInfo,
1261  outputExpectedNoQuantizedValues,
1262  outputTensorInfo,
1263  2,
1264  2,
1265  layout,
1266  padLeft,
1267  padTop,
1268  padRight,
1269  padBottom,
1270  3,
1271  3,
1272  biasEnabled
1273  );
1274 }
1275 
1276 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
1278  armnn::IWorkloadFactory& workloadFactory,
1280  armnn::IWorkloadFactory& refWorkloadFactory)
1281 {
1282  unsigned int inputHeight = 8;
1283  unsigned int inputWidth = 16;
1284  unsigned int inputChannels = 3;
1285  unsigned int inputNum = 5;
1286 
1287  unsigned int kernelHeight = 3;
1288  unsigned int kernelWidth = 3;
1289 
1290  unsigned int strideX = 2;
1291  unsigned int strideY = 3;
1292  unsigned int padX = 1;
1293  unsigned int padY = 1;
1294 
1295  unsigned int outputNum = inputNum;
1296  unsigned int outputChannels = 2;
1297  unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
1298  unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
1299 
1300  armnn::TensorInfo inputTensorInfo;
1301  armnn::TensorInfo outputTensorInfo;
1302  armnn::TensorInfo kernelDesc;
1303  armnn::TensorInfo biasDesc;
1304 
1305  unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth};
1306  unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth};
1307  unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth};
1308  unsigned int biasShape[] = {outputChannels};
1309 
1310  inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
1311  outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
1312  kernelDesc = armnn::TensorInfo(4, kernelShape, ArmnnType);
1313  biasDesc = armnn::TensorInfo(1, biasShape, ArmnnType);
1314 
1315  LayerTestResult<T,4> ret(outputTensorInfo);
1316 
1317  auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908);
1318  auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234);
1319  auto bias = MakeRandomTensor<T, 1>(biasDesc, 1028);
1320 
1321  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
1322  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1323 
1326  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
1327  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
1328 
1329  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
1330  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
1331 
1332  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1333  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1334  data.m_Weight = &weightsTensor;
1335  data.m_Bias = &biasTensor;
1336  data.m_Parameters.m_StrideX = strideX;
1337  data.m_Parameters.m_StrideY = strideY;
1338  data.m_Parameters.m_PadLeft = padX;
1339  data.m_Parameters.m_PadRight = padX;
1340  data.m_Parameters.m_PadTop = padY;
1341  data.m_Parameters.m_PadBottom = padY;
1342  data.m_Parameters.m_BiasEnabled = true;
1343 
1344  std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
1345  std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
1346 
1347  armnn::Convolution2dQueueDescriptor refData = data;
1348  armnn::WorkloadInfo refInfo = info;
1349  SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
1350  SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
1351 
1352  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
1353  std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateConvolution2d(refData, refInfo);
1354 
1355  outputHandleRef->Allocate();
1356  inputHandleRef->Allocate();
1357 
1358  inputHandle->Allocate();
1359  outputHandle->Allocate();
1360 
1361  CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
1362  CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
1363 
1364  ExecuteWorkload(*workload, memoryManager);
1365 
1366  workloadRef->PostAllocationConfigure();
1367  workloadRef->Execute();
1368 
1369  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1370  CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
1371 
1372  return ret;
1373 }
1374 
1375 //
1376 // DepthwiseConvolution2d implementations
1377 //
1378 
1379 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
1382  armnn::IWorkloadFactory& workloadFactory,
1384  const boost::multi_array<T, 4>& input,
1385  const boost::multi_array<T, 4>& kernel,
1386  const boost::multi_array<B, 1>& bias,
1387  const boost::multi_array<T, 4>& outputExpected,
1388  float qScale,
1389  int32_t qOffset,
1390  const armnn::DataLayout layout,
1391  uint32_t padLeft = 0,
1392  uint32_t padTop = 0,
1393  uint32_t padRight = 0,
1394  uint32_t padBottom = 0,
1395  uint32_t strideX = 1,
1396  uint32_t strideY = 1)
1397 {
1398  unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]);
1399  unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]);
1400  unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]);
1401  unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]);
1402  unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
1403  unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
1404  unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
1405  unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
1406  unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
1407  unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
1408  unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
1409  unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
1410 
1411  // If a bias is used, its size must equal the number of output channels.
1412  bool biasEnabled = bias.size() > 0;
1413  BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels);
1414 
1415  // Creates the tensors.
1416  armnn::TensorInfo inputTensorInfo =
1417  armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
1418  armnn::TensorInfo outputTensorInfo =
1419  armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
1420  armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
1421  armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
1422 
1423  // Set quantization parameters if the requested type is a quantized type.
1424  if (armnn::IsQuantizedType<T>())
1425  {
1426  inputTensorInfo.SetQuantizationScale(qScale);
1427  inputTensorInfo.SetQuantizationOffset(qOffset);
1428  outputTensorInfo.SetQuantizationScale(qScale);
1429  outputTensorInfo.SetQuantizationOffset(qOffset);
1430  kernelDesc.SetQuantizationScale(qScale);
1431  kernelDesc.SetQuantizationOffset(qOffset);
1432  biasDesc.SetQuantizationScale(qScale*qScale);
1433  biasDesc.SetQuantizationOffset(0);
1434  }
1435 
1436  // Construct the input data.
1437  std::vector<T> inputData;
1438  inputData.assign(input.data(), input.data() + inputChannels*inputHeight*inputWidth);
1439 
1440  // At this point if we require it permute the input data
1441  const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
1442  if (layout == armnn::DataLayout::NHWC)
1443  {
1444  std::vector<T> tmp(inputData.size());
1445  armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
1446  inputData = tmp;
1447  }
1448 
1449  auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
1450 
1451  // Construct the output data, with bias applied, as appropriate.
1452  std::vector<T> outputData;
1453  outputData.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth);
1454  if (biasEnabled)
1455  {
1456  std::vector<T> biasV;
1457  biasV.assign(bias.data(), bias.data() + outputChannels);
1458  ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
1459  biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
1460  outputWidth, outputHeight);
1461  }
1462 
1463  LayerTestResult<T, 4> ret(outputTensorInfo);
1464 
1465  // At this point if we require it permute the expected output
1466  if (layout == armnn::DataLayout::NHWC)
1467  {
1468  std::vector<T> tmp(outputData.size());
1469  armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
1470  outputData = tmp;
1471  }
1472 
1473  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
1474 
1475  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
1476  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1477 
1478  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
1479 
1480  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
1481 
1482  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
1483  if (biasEnabled)
1484  {
1485  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
1486  }
1487 
1489  data.m_Weight = &weightsTensor;
1490  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs.
1491  data.m_Parameters.m_StrideX = strideX;
1492  data.m_Parameters.m_StrideY = strideY;
1493  data.m_Parameters.m_PadLeft = padLeft;
1494  data.m_Parameters.m_PadRight = padRight;
1495  data.m_Parameters.m_PadTop = padTop;
1496  data.m_Parameters.m_PadBottom = padBottom;
1497  data.m_Parameters.m_BiasEnabled = biasEnabled;
1498  data.m_Parameters.m_DataLayout = layout;
1499 
1501  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1502  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1503 
1504  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
1505  inputHandle->Allocate();
1506  outputHandle->Allocate();
1507 
1508  CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
1509 
1510  ExecuteWorkload(*workload, memoryManager);
1511 
1512  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1513 
1514  return ret;
1515 }
1516 
1517 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
1519  armnn::IWorkloadFactory& workloadFactory,
1521  float qScale,
1522  int32_t qOffset,
1523  bool biasEnabled,
1524  const armnn::DataLayout layout)
1525 {
1527 
1528  unsigned int inputHeight = 3;
1529  unsigned int inputWidth = 3;
1530  unsigned int inputChannels = 2;
1531  unsigned int inputNum = 1;
1532 
1533  unsigned int kernelHeight = 3;
1534  unsigned int kernelWidth = 3;
1535  unsigned int kernelChannels = inputChannels;
1536  unsigned int kernelDepthMultiplier = 1;
1537 
1538  unsigned int outputHeight = 1;
1539  unsigned int outputWidth = 1;
1540  unsigned int outputChannels = kernelChannels;
1541  unsigned int outputNum = inputNum;
1542 
1543  armnn::TensorInfo inputTensorInfo =
1544  armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
1545  armnn::TensorInfo outputTensorInfo =
1546  armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
1547  armnn::TensorInfo kernelDesc({kernelDepthMultiplier, kernelChannels, kernelHeight, kernelWidth},
1548  ArmnnType);
1549  armnn::TensorInfo biasDesc({ outputChannels }, ArmnnBType);
1550 
1551  // Set quantization parameters if the requested type is a quantized type.
1552  if(armnn::IsQuantizedType<T>())
1553  {
1554  inputTensorInfo.SetQuantizationScale(qScale);
1555  inputTensorInfo.SetQuantizationOffset(qOffset);
1556  outputTensorInfo.SetQuantizationScale(qScale);
1557  outputTensorInfo.SetQuantizationOffset(qOffset);
1558  kernelDesc.SetQuantizationScale(qScale);
1559  kernelDesc.SetQuantizationOffset(qOffset);
1560  biasDesc.SetQuantizationScale(qScale*qScale);
1561  biasDesc.SetQuantizationOffset(0);
1562  }
1563  std::vector<T> inputData = std::vector<T>(
1564  QuantizedVector<T>({
1565  1.f, 2.f, 1.f,
1566  2.f, 1.f, 2.f,
1567  1.f, 2.f, 1.f,
1568 
1569  1.f, 2.f, 1.f,
1570  2.f, 1.f, 2.f,
1571  1.f, 2.f, 1.f,
1572  },
1573  inputTensorInfo.GetQuantizationScale(),
1574  inputTensorInfo.GetQuantizationOffset()));
1575 
1576  // at this point if we require it permute the input data
1577  const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
1578  if (layout == armnn::DataLayout::NHWC)
1579  {
1580  std::vector<T> tmp(inputData.size());
1581  armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
1582  inputData = tmp;
1583  }
1584  auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
1585 
1586  std::vector<B> biasV(QuantizedVector<B>({ 0, 2 },
1587  biasDesc.GetQuantizationScale(),
1588  biasDesc.GetQuantizationOffset()));
1589 
1590  auto bias = MakeTensor<B, 1>(biasDesc, biasV);
1591 
1592  std::vector<T> kernelData = std::vector<T>(
1593  QuantizedVector<T>({
1594  1.f, 0.f, 1.f,
1595  0.f, 0.f, 0.f,
1596  -1.f, 0.f, -1.f,
1597 
1598  1.f, 0.f, 1.f,
1599  0.f, 0.f, 0.f,
1600  -1.f, 0.f, -1.f,
1601  },
1602  kernelDesc.GetQuantizationScale(),
1603  kernelDesc.GetQuantizationOffset()));
1604 
1605  auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
1606 
1607  // Manually calculated.
1608  std::vector<T> outputImage(
1609  QuantizedVector<T>({ 0.f, 0.f },
1610  outputTensorInfo.GetQuantizationScale(),
1611  outputTensorInfo.GetQuantizationOffset())
1612  );
1613 
1614  // Optionally apply bias to output image.
1615  if(biasEnabled)
1616  {
1617  ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
1618  biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
1619  outputWidth, outputHeight);
1620  }
1621 
1622  LayerTestResult<T, 4> ret(outputTensorInfo);
1623  if (layout == armnn::DataLayout::NHWC)
1624  {
1625  std::vector<T> tmp(outputImage.size());
1626  armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data(), sizeof(T));
1627  outputImage = tmp;
1628  }
1629 
1630  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
1631 
1632  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
1633  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1634 
1637  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
1638  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
1639 
1640  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
1641  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
1642 
1643  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1644  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1645 
1646  data.m_Weight = &weightsTensor;
1647  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
1648  data.m_Parameters.m_StrideX = 1;
1649  data.m_Parameters.m_StrideY = 1;
1650  data.m_Parameters.m_PadLeft = 0;
1651  data.m_Parameters.m_PadRight = 0;
1652  data.m_Parameters.m_PadTop = 0;
1653  data.m_Parameters.m_PadBottom = 0;
1654  data.m_Parameters.m_BiasEnabled = biasEnabled;
1655  data.m_Parameters.m_DataLayout = layout;
1656 
1657  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
1658  inputHandle->Allocate();
1659  outputHandle->Allocate();
1660 
1661  CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
1662 
1663  ExecuteWorkload(*workload, memoryManager);
1664 
1665  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1666 
1667  return ret;
1668 }
1669 
1670 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
1672  armnn::IWorkloadFactory& workloadFactory,
1674  float qScale,
1675  int32_t qOffset,
1676  bool biasEnabled,
1677  const armnn::DataLayout layout)
1678 {
1680 
1681  unsigned int depthMultiplier = 2;
1682 
1683  unsigned int inputHeight = 8;
1684  unsigned int inputWidth = 16;
1685  unsigned int inputChannels = 2;
1686  unsigned int inputBatchSize = 1;
1687 
1688  unsigned int kernelHeight = 5;
1689  unsigned int kernelWidth = 3;
1690 
1691  unsigned int outputHeight = inputHeight - kernelHeight + 1 + 2;
1692  unsigned int outputWidth = (inputWidth - kernelWidth + 1)/2;
1693  unsigned int outputChannels = inputChannels * depthMultiplier;
1694  unsigned int outputBatchSize = inputBatchSize;
1695 
1696  armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo(
1697  inputBatchSize, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
1698  armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo(
1699  outputBatchSize, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
1700  armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth},
1701  ArmnnType);
1702  armnn::TensorInfo biasDesc({outputChannels}, ArmnnBType);
1703 
1704  // Set quantization parameters if the requested type is a quantized type.
1705  if(armnn::IsQuantizedType<T>())
1706  {
1707  inputTensorInfo.SetQuantizationScale(qScale);
1708  inputTensorInfo.SetQuantizationOffset(qOffset);
1709  outputTensorInfo.SetQuantizationScale(qScale);
1710  outputTensorInfo.SetQuantizationOffset(qOffset);
1711  kernelDesc.SetQuantizationScale(qScale);
1712  kernelDesc.SetQuantizationOffset(qOffset);
1713  biasDesc.SetQuantizationScale(qScale*qScale);
1714  biasDesc.SetQuantizationOffset(0);
1715  }
1716 
1717  // NOTE: originalInputData is in NCHW format
1718  std::vector<T> originalInputData = std::vector<T>(
1719  QuantizedVector<T>({
1720  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1721  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1722  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1723  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1724  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1725  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1726  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1727  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1728  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1729  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1730  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1731  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1732  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1733  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1734  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1735  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
1736  },
1737  inputTensorInfo.GetQuantizationScale(),
1738  inputTensorInfo.GetQuantizationOffset()));
1739 
1740  std::vector<T> inputData = originalInputData;
1741  // at this point if we require it permute the input data
1742  const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
1743  if (layout == armnn::DataLayout::NHWC)
1744  {
1745  armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC,
1746  originalInputData.data(), inputData.data(), sizeof(T));
1747  }
1748  auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
1749 
1750  std::vector<B> biasV = QuantizedVector<B>({ 0, 2, 1, -1 },
1751  biasDesc.GetQuantizationScale(),
1752  biasDesc.GetQuantizationOffset());
1753 
1754  auto bias = MakeTensor<B, 1>(biasDesc, biasV);
1755 
1756  std::vector<T> kernelData = std::vector<T>(
1757  QuantizedVector<T>({
1758  1, 1, 1,
1759  1, -1, 1,
1760  1, 1, 1,
1761  1, 1, 1,
1762  1, 1, 1,
1763 
1764  2, 2, 2,
1765  2, 2, 2,
1766  2, 2, 2,
1767  2, 2, 2,
1768  2, 2, 2,
1769 
1770  0, 0, 0,
1771  0, -1, 0,
1772  0, 0, 0,
1773  0, 0, 0,
1774  0, 0, 0,
1775 
1776  0, 0, 0,
1777  0, 0, 0,
1778  0, 1, 0,
1779  0, 0, 0,
1780  0, 0, 0
1781  },
1782  kernelDesc.GetQuantizationScale(),
1783  kernelDesc.GetQuantizationOffset()));
1784 
1785  auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
1786 
1787  // Manually calculated.
1788  std::vector<T> originalOutputImage = std::vector<T>(
1789  QuantizedVector<T>({
1790  3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f,
1791  6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f,
1792  5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
1793  6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
1794  6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
1795  5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
1796 
1797  -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
1798  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1799  -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
1800  -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
1801  -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
1802  -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
1803 
1804  8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1805  10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1806  10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1807  10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1808  10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1809  8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1810 
1811  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1812  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1813  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1814  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1815  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1816  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
1817  },
1818  outputTensorInfo.GetQuantizationScale(),
1819  outputTensorInfo.GetQuantizationOffset()));
1820 
1821  // Optionally apply bias to output image.
1822  if(biasEnabled)
1823  {
1824  ApplyBias(originalOutputImage,
1825  outputTensorInfo.GetQuantizationScale(),
1826  outputTensorInfo.GetQuantizationOffset(),
1827  biasV,
1828  biasDesc.GetQuantizationScale(),
1829  biasDesc.GetQuantizationOffset(),
1830  outputWidth,
1831  outputHeight);
1832  }
1833 
1834  LayerTestResult<T, 4> ret(outputTensorInfo);
1835  std::vector<T> outputImage = originalOutputImage;
1836  if (layout == armnn::DataLayout::NHWC)
1837  {
1838  armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC,
1839  originalOutputImage.data(), outputImage.data(), sizeof(T));
1840  }
1841 
1842  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
1843 
1844  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
1845  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1846 
1849  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
1850  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
1851 
1852  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
1853  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
1854 
1855  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1856  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1857 
1858  data.m_Weight = &weightsTensor;
1859  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
1860  data.m_Parameters.m_StrideX = 2;
1861  data.m_Parameters.m_StrideY = 1;
1862  data.m_Parameters.m_PadLeft = 0;
1863  data.m_Parameters.m_PadRight = 0;
1864  data.m_Parameters.m_PadTop = 1;
1865  data.m_Parameters.m_PadBottom = 1;
1866  data.m_Parameters.m_BiasEnabled = biasEnabled;
1867  data.m_Parameters.m_DataLayout = layout;
1868 
1869  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
1870  inputHandle->Allocate();
1871  outputHandle->Allocate();
1872 
1873  CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
1874 
1875  ExecuteWorkload(*workload, memoryManager);
1876 
1877  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1878 
1879  return ret;
1880 }
1881 
1882 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
1885  armnn::IWorkloadFactory& workloadFactory,
1887  const boost::multi_array<T, 4>& originalInput,
1888  const boost::multi_array<T, 4>& originalKernel,
1889  const boost::multi_array<B, 1>& bias,
1890  const boost::multi_array<T, 4>& originalOutputExpected,
1891  float qScale,
1892  int32_t qOffset,
1894  uint32_t padLeft = 0,
1895  uint32_t padTop = 0,
1896  uint32_t padRight = 0,
1897  uint32_t padBottom = 0,
1898  uint32_t strideX = 1,
1899  uint32_t strideY = 1,
1900  uint32_t dilationX = 1,
1901  uint32_t dilationY = 1)
1902 {
1903  unsigned int inputHeight = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
1904  unsigned int inputWidth = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
1905  unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
1906  unsigned int inputNum = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
1907 
1908  unsigned int outputHeight = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
1909  unsigned int outputWidth = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
1910  unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
1911  unsigned int outputNum = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
1912 
1913  unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
1914  unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
1915  unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
1916  unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
1917 
1918  bool biasEnabled = bias.size() > 0;
1919 
1920  // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
1921  BOOST_ASSERT(inputNum == 1);
1922  BOOST_ASSERT(outputNum == 1);
1923 
1924  // If a bias is used, its size must equal the number of output channels.
1925  BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels);
1926 
1927 
1928  // Note these tensors will use two (identical) batches.
1929  armnn::TensorInfo inputTensorInfo =
1930  armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
1931  armnn::TensorInfo outputTensorInfo =
1932  armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
1933 
1934  // Kernel must be NCHW layout always, independently of the layout of the input and output for depthwise convolution.
1935  armnn::TensorInfo kernelDesc({kernelDepthMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
1936 
1937  armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
1938 
1939  // Set quantization parameters if the requested type is a quantized type.
1940  if(armnn::IsQuantizedType<T>())
1941  {
1942  inputTensorInfo.SetQuantizationScale(qScale);
1943  inputTensorInfo.SetQuantizationOffset(qOffset);
1944  outputTensorInfo.SetQuantizationScale(qScale);
1945  outputTensorInfo.SetQuantizationOffset(qOffset);
1946  kernelDesc.SetQuantizationScale(qScale);
1947  kernelDesc.SetQuantizationOffset(qOffset);
1948  biasDesc.SetQuantizationScale(qScale*qScale);
1949  biasDesc.SetQuantizationOffset(0);
1950  }
1951 
1952  LayerTestResult<T, 4> ret(outputTensorInfo);
1953 
1954  // Construct input data
1955  std::vector<T> input;
1956  input.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth);
1957  std::vector<T> inputData;
1958  inputData.insert(inputData.end(), input.begin(), input.end());
1959  inputData.insert(inputData.end(), input.begin(), input.end());
1960 
1961  // at this point if we require it permute the input data
1962  const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
1963  if (layout == armnn::DataLayout::NHWC)
1964  {
1965  std::vector<T> tmp(inputData.size());
1966  armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
1967  inputData = tmp;
1968  }
1969 
1970  auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
1971 
1972  std::vector<T> output;
1973  output.assign(originalOutputExpected.data(),
1974  originalOutputExpected.data() + outputChannels*outputHeight*outputWidth);
1975 
1976  // Apply bias to output data if it is enabled.
1977  if(biasEnabled)
1978  {
1979  std::vector<T> biasV;
1980  biasV.assign(bias.data(), bias.data() + outputChannels);
1981  ApplyBias(output, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
1982  biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
1983  outputWidth, outputHeight);
1984  }
1985 
1986  // Construct expected output data
1987  std::vector<T> outputData;
1988  outputData.insert(outputData.end(), output.begin(), output.end());
1989  outputData.insert(outputData.end(), output.begin(), output.end());
1990 
1991  // at this point if we require it permute the expected output
1992  if (layout == armnn::DataLayout::NHWC)
1993  {
1994  std::vector<T> tmp(outputData.size());
1995  armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
1996  outputData = tmp;
1997  }
1998  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
1999 
2000  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2001  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2002 
2005  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
2006  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
2007 
2008  boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
2009  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
2010 
2011  if(biasEnabled)
2012  {
2013  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
2014  }
2015 
2016  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
2017  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2018 
2019  data.m_Weight = &weightsTensor;
2020  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
2021  data.m_Parameters.m_StrideX = strideX;
2022  data.m_Parameters.m_StrideY = strideY;
2023  data.m_Parameters.m_PadLeft = padLeft;
2024  data.m_Parameters.m_PadRight = padRight;
2025  data.m_Parameters.m_PadTop = padTop;
2026  data.m_Parameters.m_PadBottom = padBottom;
2027  data.m_Parameters.m_BiasEnabled = biasEnabled;
2028  data.m_Parameters.m_DataLayout = layout;
2029  data.m_Parameters.m_DilationX = dilationX;
2030  data.m_Parameters.m_DilationY = dilationY;
2031 
2032  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
2033  inputHandle->Allocate();
2034  outputHandle->Allocate();
2035 
2036  CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
2037 
2038  ExecuteWorkload(*workload, memoryManager);
2039 
2040  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
2041 
2042  return ret;
2043 }
2044 
2045 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2046  typename T = armnn::ResolveType<ArmnnType>>
2048  armnn::IWorkloadFactory& workloadFactory,
2050  float qScale,
2051  int32_t qOffset,
2052  bool biasEnabled,
2053  const armnn::DataLayout layout)
2054 {
2055  // Use a single-batch 2-channel 5x5 image as input.
2056  armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
2057  auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
2058  QuantizedVector<T>({
2059  0, 1, 2, 3, 4,
2060  5, 6, 7, 8, 9,
2061  10, 11, 12, 13, 14,
2062  15, 16, 17, 18, 19,
2063  20, 21, 22, 23, 24,
2064 
2065  25, 26, 27, 28, 29,
2066  30, 31, 32, 33, 34,
2067  35, 36, 37, 38, 39,
2068  40, 41, 42, 43, 44,
2069  45, 46, 47, 48, 49
2070  },
2071  inputTensorInfo.GetQuantizationScale(),
2072  inputTensorInfo.GetQuantizationOffset())));
2073 
2074  // Use a depth multiplier of 1 on a 2-channel 4x4 kernel.
2075  armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
2076  auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
2077  QuantizedVector<T>({
2078  32, 31, 30, 29,
2079  28, 27, 26, 25,
2080  24, 23, 22, 21,
2081  20, 19, 18, 17,
2082 
2083  16, 15, 14, 13,
2084  12, 11, 10, 9,
2085  8, 7, 6, 5,
2086  4, 3, 2, 1
2087  },
2088  kernelTensorInfo.GetQuantizationScale(),
2089  kernelTensorInfo.GetQuantizationOffset())));
2090 
2091  // Expected output is 1 batch of a 2-channel 5x5 image.
2092  // Calculated using the python tensorflow library with strideX=1, strideY=1.
2093  armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
2094  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
2095  QuantizedVector<T>({
2096  1062, 1580, 1850, 1530, 1117,
2097  2140, 3108, 3500, 2842, 2042,
2098  3580, 5068, 5460, 4342, 3062,
2099  3618, 5072, 5390, 4248, 2971,
2100  3074, 4282, 4510, 3533, 2457,
2101 
2102  1550, 2284, 2362, 1955, 1428,
2103  2910, 4206, 4342, 3528, 2536,
2104  3390, 4886, 5022, 4068, 2916,
2105  3566, 5056, 5182, 4133, 2922,
2106  3100, 4352, 4452, 3517, 2465
2107  },
2108  outputTensorInfo.GetQuantizationScale(),
2109  outputTensorInfo.GetQuantizationOffset())));
2110 
2111  return DepthwiseConvolution2dAsymmetricTestImpl<ArmnnType, ArmnnBType>(
2112  workloadFactory,
2113  memoryManager,
2114  input,
2115  kernel,
2116  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
2117  expectedOutput,
2118  qScale,
2119  qOffset,
2120  layout,
2121  1, // Padding left.
2122  1, // Padding top.
2123  2, // Padding right.
2124  2, // Padding bottom.
2125  1, // strideX
2126  1); // strideY
2127 }
2128 
2129 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2130  typename T = armnn::ResolveType<ArmnnType>>
2132  armnn::IWorkloadFactory& workloadFactory,
2134  float qScale,
2135  int32_t qOffset,
2136  bool biasEnabled)
2137 {
2138  auto layout = armnn::DataLayout::NHWC;
2139 
2140  armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
2141  auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
2142  QuantizedVector<T>({
2143  0, 1, 2, 3, 4,
2144  5, 6, 7, 8, 9,
2145  10, 11, 12, 13, 14,
2146  15, 16, 17, 18, 19,
2147  20, 21, 22, 23, 24,
2148 
2149  25, 26, 27, 28, 29,
2150  30, 31, 32, 33, 34,
2151  35, 36, 37, 38, 39,
2152  40, 41, 42, 43, 44,
2153  45, 46, 47, 48, 49
2154  },
2155  inputTensorInfo.GetQuantizationScale(),
2156  inputTensorInfo.GetQuantizationOffset())));
2157 
2158  armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
2159  auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
2160  QuantizedVector<T>({
2161  32, 31, 30, 29,
2162  28, 27, 26, 25,
2163  24, 23, 22, 21,
2164  20, 19, 18, 17,
2165 
2166  16, 15, 14, 13,
2167  12, 11, 10, 9,
2168  8, 7, 6, 5,
2169  4, 3, 2, 1
2170  },
2171  kernelTensorInfo.GetQuantizationScale(),
2172  kernelTensorInfo.GetQuantizationOffset())));
2173 
2174  armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
2175  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
2176  QuantizedVector<T>({
2177  1062, 1580, 1850, 1530, 1117,
2178  2140, 3108, 3500, 2842, 2042,
2179  3580, 5068, 5460, 4342, 3062,
2180  3618, 5072, 5390, 4248, 2971,
2181  3074, 4282, 4510, 3533, 2457,
2182 
2183  1550, 2284, 2362, 1955, 1428,
2184  2910, 4206, 4342, 3528, 2536,
2185  3390, 4886, 5022, 4068, 2916,
2186  3566, 5056, 5182, 4133, 2922,
2187  3100, 4352, 4452, 3517, 2465
2188  },
2189  outputTensorInfo.GetQuantizationScale(),
2190  outputTensorInfo.GetQuantizationOffset())));
2191 
2192  return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
2193  workloadFactory,
2194  memoryManager,
2195  input,
2196  kernel,
2197  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
2198  expectedOutput,
2199  qScale,
2200  qOffset,
2201  layout,
2202  1, // Padding left.
2203  1, // Padding top.
2204  2, // Padding right.
2205  2, // Padding bottom.
2206  1, // strideX
2207  1); // strideY
2208 }
2209 
2210 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2211  typename T = armnn::ResolveType<ArmnnType>>
2213  armnn::IWorkloadFactory& workloadFactory,
2215  float qScale,
2216  int32_t qOffset,
2217  bool biasEnabled)
2218 {
2219  auto layout = armnn::DataLayout::NHWC;
2220 
2221  armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9}, ArmnnType);
2222  auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
2223  QuantizedVector<T>({
2224  0, 0, 0, 0, 0, 0, 0, 0, 0,
2225  0, 0, 0, 0, 0, 0, 0, 0, 0,
2226  0, 0, 0, 0, 0, 0, 0, 0, 0,
2227  0, 0, 0, 1, 1, 1, 0, 0, 0,
2228  0, 0, 0, 1, 1, 1, 0, 0, 0,
2229  0, 0, 0, 1, 1, 1, 0, 0, 0,
2230  0, 0, 0, 0, 0, 0, 0, 0, 0,
2231  0, 0, 0, 0, 0, 0, 0, 0, 0,
2232  0, 0, 0, 0, 0, 0, 0, 0, 0
2233  },
2234  inputTensorInfo.GetQuantizationScale(),
2235  inputTensorInfo.GetQuantizationOffset())));
2236 
2237  armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
2238  auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
2239  QuantizedVector<T>({
2240  1, 2, 3,
2241  4, 5, 6,
2242  7, 8, 9
2243  },
2244  kernelTensorInfo.GetQuantizationScale(),
2245  kernelTensorInfo.GetQuantizationOffset())));
2246 
2247  uint32_t padLeft = 0;
2248  uint32_t padTop = 0;
2249  uint32_t padRight = 0;
2250  uint32_t padBottom = 0;
2251  uint32_t strideX = 1;
2252  uint32_t strideY = 1;
2253  uint32_t dilationX = 3;
2254  uint32_t dilationY = 3;
2255 
2256  // Since the dilation rate is 3 this will reduce the size of the output from 9x9 to 3x3 of all 5s.
2257  armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3}, ArmnnType);
2258  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
2259  QuantizedVector<T>({
2260  5, 5, 5,
2261  5, 5, 5,
2262  5, 5, 5
2263  },
2264  outputTensorInfo.GetQuantizationScale(),
2265  outputTensorInfo.GetQuantizationOffset())));
2266 
2267  return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
2268  workloadFactory,
2269  memoryManager,
2270  input,
2271  kernel,
2272  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
2273  expectedOutput,
2274  qScale,
2275  qOffset,
2276  layout,
2277  padLeft,
2278  padTop,
2279  padRight,
2280  padBottom,
2281  strideX,
2282  strideY,
2283  dilationX,
2284  dilationY);
2285 }
2286 
2287 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
2289  armnn::IWorkloadFactory& workloadFactory,
2291  const std::vector<float>& inputNoQuantizedValues,
2292  armnn::TensorInfo& inputTensorInfo,
2293  const std::vector<float>& kernelNoQuantizedValues,
2294  armnn::TensorInfo& kernelTensorInfo,
2295  const std::vector<float>& outputExpectedNoQuantizedValues,
2296  armnn::TensorInfo& outputTensorInfo,
2297  uint32_t dilationX,
2298  uint32_t dilationY,
2300  bool biasEnabled = false)
2301 {
2302  float qScale;
2303  int32_t qOffset;
2304  switch (ArmnnType)
2305  {
2307  {
2308  qScale = 0.1f;
2309  qOffset = 128;
2310  break;
2311  }
2313  {
2314  qScale = 0.1f;
2315  qOffset = 0;
2316  break;
2317  }
2319  default:
2320  {
2321  qScale = 0.f;
2322  qOffset = 0;
2323  break;
2324  }
2325  }
2326 
2327  inputTensorInfo.SetQuantizationScale(qScale);
2328  inputTensorInfo.SetQuantizationOffset(qOffset);
2329  kernelTensorInfo.SetQuantizationScale(qScale);
2330  kernelTensorInfo.SetQuantizationOffset(qOffset);
2331  outputTensorInfo.SetQuantizationScale(qScale);
2332  outputTensorInfo.SetQuantizationOffset(qOffset);
2333 
2334  auto input = MakeTensor<T, 4>(inputTensorInfo,
2335  std::vector<T>(QuantizedVector<T>(inputNoQuantizedValues,
2336  inputTensorInfo.GetQuantizationScale(),
2337  inputTensorInfo.GetQuantizationOffset())));
2338  auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
2339  std::vector<T>(QuantizedVector<T>(kernelNoQuantizedValues,
2340  kernelTensorInfo.GetQuantizationScale(),
2341  kernelTensorInfo.GetQuantizationOffset())));
2342  auto expectedOutput =
2343  MakeTensor<T, 4>(outputTensorInfo,
2344  std::vector<T>(QuantizedVector<T>(outputExpectedNoQuantizedValues,
2345  outputTensorInfo.GetQuantizationScale(),
2346  outputTensorInfo.GetQuantizationOffset())));
2347 
2348  uint32_t padLeft = 0;
2349  uint32_t padTop = 0;
2350  uint32_t padRight = 0;
2351  uint32_t padBottom = 0;
2352  uint32_t strideX = 1;
2353  uint32_t strideY = 1;
2354 
2355  return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
2356  workloadFactory,
2357  memoryManager,
2358  input,
2359  kernel,
2360  GetBias<ArmnnBType>(biasEnabled, qScale * qScale, outputTensorInfo, layout),
2361  expectedOutput,
2362  qScale,
2363  qOffset,
2364  layout,
2365  padLeft,
2366  padTop,
2367  padRight,
2368  padBottom,
2369  strideX,
2370  strideY,
2371  dilationX,
2372  dilationY);
2373 }
2374 
2375 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
2377  armnn::IWorkloadFactory& workloadFactory,
2379  bool biasEnabled,
2380  const armnn::DataLayout layout)
2381 {
2382  armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
2383  std::vector<float> inputNoQuantizedValues =
2384  {
2385  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2386  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2387  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2388  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2389  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2390  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2391  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2392  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2393  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2394  0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2395  };
2396 
2397  armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
2398  std::vector<float> kernelNoQuantizedValues =
2399  {
2400  1, 2, 3,
2401  4, 5, 6,
2402  7, 8, 9
2403  };
2404 
2405  // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
2406  // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
2407  armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
2408  std::vector<float> outputExpectedNoQuantizedValues =
2409  {
2410  6., 5., 5., 5.,
2411  6., 5., 5., 5.,
2412  6., 5., 5., 5.,
2413  3., 2., 2., 2.
2414  };
2415 
2416  return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2417  workloadFactory,
2418  memoryManager,
2419  inputNoQuantizedValues,
2420  inputTensorInfo,
2421  kernelNoQuantizedValues,
2422  kernelTensorInfo,
2423  outputExpectedNoQuantizedValues,
2424  outputTensorInfo,
2425  3,
2426  3,
2427  layout,
2428  biasEnabled);
2429 }
2430 
2431 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
2433  armnn::IWorkloadFactory& workloadFactory,
2435  bool biasEnabled,
2436  const armnn::DataLayout layout)
2437 {
2438  armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
2439  std::vector<float> inputNoQuantizedValues =
2440  {
2441  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2442  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2443  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2444  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2445  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2446  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2447  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2448  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2449  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2450  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2451 
2452  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2453  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2454  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2455  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2456  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2457  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2458  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2459  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2460  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2461  0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2462  };
2463 
2464  armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
2465  std::vector<float> kernelNoQuantizedValues =
2466  {
2467  1, 2, 3,
2468  4, 5, 6,
2469  7, 8, 9,
2470 
2471  1, 2, 3,
2472  4, 5, 6,
2473  7, 8, 9
2474  };
2475 
2476  // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
2477  // therefore the output will be 2x4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
2478  armnn::TensorInfo outputTensorInfo({ 1, 2, 4, 4}, ArmnnType);
2479  std::vector<float> outputExpectedNoQuantizedValues =
2480  {
2481  6., 5., 5., 5.,
2482  6., 5., 5., 5.,
2483  6., 5., 5., 5.,
2484  3., 2., 2., 2.,
2485 
2486  6., 5., 5., 5.,
2487  6., 5., 5., 5.,
2488  6., 5., 5., 5.,
2489  3., 2., 2., 2.
2490  };
2491 
2492  return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2493  workloadFactory,
2494  memoryManager,
2495  inputNoQuantizedValues,
2496  inputTensorInfo,
2497  kernelNoQuantizedValues,
2498  kernelTensorInfo,
2499  outputExpectedNoQuantizedValues,
2500  outputTensorInfo,
2501  3,
2502  3,
2503  layout,
2504  biasEnabled);
2505 }
2506 
2507 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
2509  armnn::IWorkloadFactory& workloadFactory,
2511  bool biasEnabled,
2512  const armnn::DataLayout layout)
2513 {
2514  armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
2515  std::vector<float> inputNoQuantizedValues =
2516  {
2517  10.0, 10.0, 10.0,
2518  10.0, 10.0, 10.0,
2519  10.0, 10.0, 10.0,
2520 
2521  21.0, 22.0, 23.0,
2522  24.0, 25.0, 26.0,
2523  27.0, 28.0, 29.0
2524  };
2525 
2526  armnn::TensorInfo kernelTensorInfo({ 4, 2, 2, 2}, ArmnnType);
2527 
2528  std::vector<float> kernelNoQuantizedValues =
2529  {
2530  0.25f, 0.25f,
2531  0.25f, 0.25f,
2532 
2533  0.25f, 0.25f,
2534  0.25f, 0.25f,
2535 
2536  0.0f , 0.0f,
2537  0.0f , 0.1f,
2538 
2539  0.0f , 0.0f,
2540  0.0f , 0.1f,
2541 
2542  0.2f , 0.0f,
2543  0.0f , 0.0f,
2544 
2545  0.2f , 0.0f,
2546  0.0f , 0.0f,
2547 
2548  0.0f , 0.3f,
2549  0.0f , 0.0f,
2550 
2551  0.0f , 0.3f,
2552  0.0f , 0.0f
2553  };
2554 
2555  armnn::TensorInfo outputTensorInfo({ 1, 8, 2, 2}, ArmnnType);
2556  std::vector<float> outputExpectedNoQuantizedValues =
2557  {
2558  10.f, 10.f,
2559  10.f, 10.f,
2560 
2561  1.f, 1.f,
2562  1.f, 1.f,
2563 
2564  2.f, 2.f,
2565  2.f, 2.f,
2566 
2567  3.f, 3.f,
2568  3.f, 3.f,
2569 
2570  23.f, 24.f,
2571  26.f, 27.f,
2572 
2573  2.5f, 2.6000001f,
2574  2.8f, 2.9f,
2575 
2576  4.2000003f, 4.4f,
2577  4.8f, 5.f,
2578 
2579  6.6000004f, 6.9f,
2580  7.5000005f, 7.8f
2581  };
2582 
2583 
2584  return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2585  workloadFactory,
2586  memoryManager,
2587  inputNoQuantizedValues,
2588  inputTensorInfo,
2589  kernelNoQuantizedValues,
2590  kernelTensorInfo,
2591  outputExpectedNoQuantizedValues,
2592  outputTensorInfo,
2593  1,
2594  1,
2595  layout,
2596  biasEnabled);
2597 }
2598 
2599 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
2601  armnn::IWorkloadFactory& workloadFactory,
2603  bool biasEnabled,
2604  const armnn::DataLayout layout)
2605 {
2606  armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
2607  std::vector<float> inputNoQuantizedValues =
2608  {
2609  10.0, 10.0, 10.0,
2610  10.0, 10.0, 10.0,
2611  10.0, 10.0, 10.0,
2612 
2613  21.0, 22.0, 23.0,
2614  24.0, 25.0, 26.0,
2615  27.0, 28.0, 29.0
2616  };
2617 
2618  armnn::TensorInfo kernelTensorInfo({ 2, 2, 2, 2}, ArmnnType);
2619 
2620  std::vector<float> kernelNoQuantizedValues =
2621  {
2622  0.25f, 0.25f,
2623  0.25f, 0.25f,
2624 
2625  0.2f , 0.0f,
2626  0.0f , 0.0f,
2627 
2628  0.0f , 0.0f,
2629  0.0f , 0.1f,
2630 
2631  0.0f , 0.3f,
2632  0.0f , 0.0f
2633 
2634  };
2635 
2636  armnn::TensorInfo outputTensorInfo({ 1, 4, 2, 2}, ArmnnType);
2637  std::vector<float> outputExpectedNoQuantizedValues =
2638  {
2639  10.f, 10.f,
2640  10.f, 10.f,
2641 
2642  1.f, 1.f,
2643  1.f, 1.f,
2644 
2645  4.2000003f, 4.4f,
2646  4.8f, 5.f,
2647 
2648  6.6000004f, 6.9f,
2649  7.5000005f, 7.8f
2650  };
2651 
2652 
2653  return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2654  workloadFactory,
2655  memoryManager,
2656  inputNoQuantizedValues,
2657  inputTensorInfo,
2658  kernelNoQuantizedValues,
2659  kernelTensorInfo,
2660  outputExpectedNoQuantizedValues,
2661  outputTensorInfo,
2662  1,
2663  1,
2664  layout,
2665  biasEnabled);
2666 }
2667 
2668 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
2670  armnn::IWorkloadFactory& workloadFactory,
2672  armnn::IWorkloadFactory& refWorkloadFactory,
2673  const armnnUtils::DataLayoutIndexed& layout)
2674 {
2675  unsigned int inputHeight = 8;
2676  unsigned int inputWidth = 16;
2677  unsigned int inputChannels = 3;
2678  unsigned int inputNum = 5;
2679 
2680  unsigned int kernelHeight = 3;
2681  unsigned int kernelWidth = 3;
2682  unsigned int channelMultiplier = 1;
2683 
2684  unsigned int strideX = 2;
2685  unsigned int strideY = 3;
2686  unsigned int padX = 1;
2687  unsigned int padY = 1;
2688 
2689  unsigned int outputNum = inputNum;
2690  unsigned int outputChannels = inputChannels * channelMultiplier;
2691  unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
2692  unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
2693 
2694  armnn::TensorInfo inputTensorInfo;
2695  armnn::TensorInfo outputTensorInfo;
2696  armnn::TensorInfo kernelDesc;
2697  armnn::TensorInfo biasDesc;
2698 
2699 
2700  std::vector<unsigned int> inputShape;
2701  std::vector<unsigned int> outputShape;
2702  std::vector<unsigned int> kernelShape{ channelMultiplier, inputChannels, kernelHeight, kernelWidth };
2703  std::vector<unsigned int> biasShape{ outputChannels };
2704  switch (layout.GetDataLayout())
2705  {
2707  inputShape = { inputNum, inputChannels, inputHeight, inputWidth };
2708  outputShape = { outputNum, outputChannels, outputHeight, outputWidth };
2709  break;
2711  inputShape = { inputNum, inputHeight, inputWidth, inputChannels };
2712  outputShape = { outputNum, outputHeight, outputWidth, outputChannels };
2713  break;
2714  default:
2715  throw armnn::InvalidArgumentException("unknown data layout ["
2716  + std::to_string(static_cast<int>(layout.GetDataLayout())) + "]");
2717  }
2718 
2719  float inputsQScale = armnn::IsQuantizedType<T>() ? 1.0f : 0;
2720  float outputQScale = armnn::IsQuantizedType<T>() ? 2.0f : 0;
2721  int32_t qOffset = 0;
2722 
2723  inputTensorInfo = armnn::TensorInfo(4, inputShape.data(), ArmnnType, inputsQScale, qOffset);
2724  outputTensorInfo = armnn::TensorInfo(4, outputShape.data(), ArmnnType, outputQScale, qOffset);
2725  kernelDesc = armnn::TensorInfo(4, kernelShape.data(), ArmnnType, inputsQScale, qOffset);
2726  biasDesc = armnn::TensorInfo(
2727  1, biasShape.data(), armnn::GetBiasDataType(ArmnnType), inputsQScale, qOffset);
2728 
2729  LayerTestResult<T, 4> ret(outputTensorInfo);
2730 
2731  auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908, 0.0f, 255.0f);
2732  auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234, 0.0f, 255.0f);
2733  auto bias = MakeRandomTensor<typename FullyConnectedBiasTypeForInputType<T>::Type, 1>(
2734  biasDesc, 1028, 0.0f, 255.0f);
2735 
2736  std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2737  std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2738 
2741  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
2742  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
2743 
2744  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
2745  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
2746 
2747  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
2748  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2749  data.m_Weight = &weightsTensor;
2750  data.m_Bias = &biasTensor;
2751  data.m_Parameters.m_StrideX = strideX;
2752  data.m_Parameters.m_StrideY = strideY;
2753  data.m_Parameters.m_PadLeft = padX;
2754  data.m_Parameters.m_PadRight = padX;
2755  data.m_Parameters.m_PadTop = padY;
2756  data.m_Parameters.m_PadBottom = padY;
2757  data.m_Parameters.m_BiasEnabled = true;
2758  data.m_Parameters.m_DataLayout = layout.GetDataLayout();
2759 
2760  std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
2761  std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
2762 
2764  armnn::WorkloadInfo refInfo = info;
2765  SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
2766  SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
2767 
2768  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
2769  std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateDepthwiseConvolution2d(refData, refInfo);
2770 
2771  outputHandleRef->Allocate();
2772  inputHandleRef->Allocate();
2773 
2774  inputHandle->Allocate();
2775  outputHandle->Allocate();
2776 
2777  CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
2778  CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
2779 
2780  ExecuteWorkload(*workload, memoryManager);
2781 
2782  workloadRef->PostAllocationConfigure();
2783  workloadRef->Execute();
2784 
2785  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
2786  CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
2787 
2788  return ret;
2789 }
2790 
2791 //
2792 // Explicit template specializations
2793 //
2795 Convolution2d3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
2798  bool,
2800 
2802 Convolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
2803  armnn::IWorkloadFactory&,
2804  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2805  bool,
2807 
2809 Convolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
2810  armnn::IWorkloadFactory&,
2811  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2812  bool,
2814 
2816 Convolution2d3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
2817  armnn::IWorkloadFactory&,
2818  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2819  bool,
2821 
2822 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
2823 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
2824  armnn::IWorkloadFactory&,
2825  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2826  bool,
2828 
2830 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
2831  armnn::IWorkloadFactory&,
2832  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2833  bool,
2835 
2836 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
2837 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
2838  armnn::IWorkloadFactory&,
2839  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2840  bool,
2842 
2843 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
2844 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
2845  armnn::IWorkloadFactory&,
2846  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2847  bool,
2849 
2850 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
2851 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
2852  armnn::IWorkloadFactory &workloadFactory,
2853  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
2854  bool biasEnabled,
2855  const armnn::DataLayout layout);
2856 
2857 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
2858 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
2859  armnn::IWorkloadFactory &workloadFactory,
2860  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
2861  bool biasEnabled,
2862  const armnn::DataLayout layout);
2863 
2864 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
2865 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
2866  armnn::IWorkloadFactory &workloadFactory,
2867  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
2868  bool biasEnabled,
2869  const armnn::DataLayout layout);
2870 
2871 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
2872 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
2873  armnn::IWorkloadFactory &workloadFactory,
2874  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
2875  bool biasEnabled,
2876  const armnn::DataLayout layout);
2877 
2878 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
2879 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
2880  armnn::IWorkloadFactory&,
2881  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2882  bool,
2884 
2885 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
2886 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
2887  armnn::IWorkloadFactory&,
2888  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2889  bool,
2891 
2892 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
2893 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
2894  armnn::IWorkloadFactory&,
2895  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2896  bool,
2898 
2899 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
2900 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
2901  armnn::IWorkloadFactory&,
2902  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2903  bool,
2905 
2906 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
2907 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
2908  armnn::IWorkloadFactory&,
2909  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2910  bool,
2912 
2913 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
2914 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
2915  armnn::IWorkloadFactory&,
2916  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2917  bool,
2919 
2920 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
2921 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
2922  armnn::IWorkloadFactory&,
2923  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2924  bool,
2926 
2927 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
2928 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
2929  armnn::IWorkloadFactory&,
2930  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
2931  bool,
2933 
2934 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
2935 DepthwiseConvolution2dMult4Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
2936  armnn::IWorkloadFactory &workloadFactory,
2937  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
2938  bool biasEnabled,
2939  const armnn::DataLayout layout);
2940 
2941 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
2942 DepthwiseConvolution2dMult4Test<armnn::DataType::Float32, armnn::DataType::Float32>(
2943  armnn::IWorkloadFactory &workloadFactory,
2944  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
2945  bool biasEnabled,
2946  const armnn::DataLayout layout);
2947 
2948 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
2949 DepthwiseConvolution2dMult2Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
2950  armnn::IWorkloadFactory &workloadFactory,
2951  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
2952  bool biasEnabled,
2953  const armnn::DataLayout layout);
2954 
2955 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
2956 DepthwiseConvolution2dMult2Test<armnn::DataType::Float32, armnn::DataType::Float32>(
2957  armnn::IWorkloadFactory &workloadFactory,
2958  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
2959  bool biasEnabled,
2960  const armnn::DataLayout layout);
2961 
2962 //
2963 // Implementation functions
2964 //
2965 
2967  armnn::IWorkloadFactory& workloadFactory,
2968  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2969  bool biasEnabled,
2970  const armnn::DataLayout layout)
2971 {
2972  return SimpleConvolution2d3x5TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
2973  workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
2974 }
2975 
2977  armnn::IWorkloadFactory& workloadFactory,
2978  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2979  bool biasEnabled,
2980  const armnn::DataLayout layout)
2981 {
2982  return SimpleConvolution2d3x5TestCommon<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
2983  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
2984 }
2985 
2987  armnn::IWorkloadFactory& workloadFactory,
2988  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2989  bool biasEnabled,
2990  const armnn::DataLayout layout)
2991 {
2992  return SimpleConvolution2d3x3TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
2993  workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
2994 }
2995 
2997  armnn::IWorkloadFactory& workloadFactory,
2998  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2999  bool biasEnabled)
3000 {
3001  return SimpleConvolution2d3x3NhwcTestCommon<armnn::DataType::Float32>(
3002  workloadFactory,
3003  memoryManager,
3004  0.f,
3005  0,
3006  biasEnabled,
3008 }
3009 
3011  armnn::IWorkloadFactory& workloadFactory,
3012  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3013  bool biasEnabled,
3014  const armnn::DataLayout layout)
3015 {
3016  return SimpleConvolution2d3x3Stride2x2TestCommon<armnn::DataType::Float32>(
3017  workloadFactory,
3018  memoryManager,
3019  0.f,
3020  0,
3021  biasEnabled,
3022  layout);
3023 }
3024 
3026  armnn::IWorkloadFactory& workloadFactory,
3027  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3028  bool biasEnabled,
3029  const armnn::DataLayout layout)
3030 {
3031  return SimpleConvolution2d3x3TestCommon<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3032  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
3033 }
3034 
3036  armnn::IWorkloadFactory& workloadFactory,
3037  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3038  bool biasEnabled,
3039  const armnn::DataLayout layout)
3040 {
3041  return SimpleConvolution2d3x5TestCommon<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3042  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
3043 }
3044 
3046  armnn::IWorkloadFactory& workloadFactory,
3047  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3048  bool biasEnabled,
3049  const armnn::DataLayout layout)
3050 {
3051  return SimpleConvolution2d3x3TestCommon<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3052  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
3053 }
3054 
3056  armnn::IWorkloadFactory& workloadFactory,
3057  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3058  armnn::DataLayout layout)
3059 {
3060  return SimpleConvolution2dAsymmetricPaddingTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3061  workloadFactory, memoryManager, layout, 0.0f, 0);
3062 }
3063 
3065  armnn::IWorkloadFactory& workloadFactory,
3066  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3067  armnn::DataLayout layout)
3068 {
3070  <armnn::DataType::Float32, armnn::DataType::Float32>(
3071  workloadFactory, memoryManager, layout, 0.0f, 0);
3072 }
3073 
3075  armnn::IWorkloadFactory& workloadFactory,
3076  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3077  bool biasEnabled)
3078 {
3079  return Convolution1dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3080  workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
3081 }
3082 
3084  armnn::IWorkloadFactory& workloadFactory,
3085  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3086  bool biasEnabled)
3087 {
3088  return Convolution1dTestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3089  workloadFactory, memoryManager, 0.1f, 128, biasEnabled);
3090 }
3091 
3093  armnn::IWorkloadFactory& workloadFactory,
3094  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3095  const armnn::DataLayout layout)
3096 {
3097  using namespace armnn;
3098 
3099  const DataType inputType = DataType::QAsymmU8;
3100  const DataType kernelType = DataType::QSymmS8;
3101  const DataType biasType = DataType::Signed32;
3102 
3103  TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128);
3104  TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128);
3105 
3106  const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f };
3107  constexpr unsigned int quantDimension = 0;
3108 
3109  TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension);
3110 
3111  const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f };
3112  TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension);
3113 
3114  std::vector<uint8_t> inputData =
3115  {
3116  138, 108, 138, 108, 138, 108
3117  };
3118 
3119  std::vector<int8_t> kernelData =
3120  {
3121  1, 2, 1, 2, 1, 2
3122  };
3123 
3124  std::vector<int32_t> biasData =
3125  {
3126  4, 4, 4
3127  };
3128 
3129  std::vector<uint8_t> expectedOutputData =
3130  {
3131  121, 118, 115, 121, 118, 115, 121, 118, 115
3132  };
3133 
3134  if (layout == DataLayout::NCHW)
3135  {
3136  PermuteTensorNhwcToNchw(inputInfo, inputData);
3137  PermuteTensorNhwcToNchw(kernelInfo, kernelData);
3138  PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
3139  }
3140 
3141  Convolution2dDescriptor descriptor;
3142  descriptor.m_StrideX = 1;
3143  descriptor.m_StrideY = 1;
3144  descriptor.m_PadLeft = 0;
3145  descriptor.m_PadRight = 0;
3146  descriptor.m_PadTop = 0;
3147  descriptor.m_PadBottom = 0;
3148  descriptor.m_BiasEnabled = true;
3149  descriptor.m_DataLayout = layout;
3150 
3151  std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
3152  std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
3153 
3154  WorkloadInfo workloadInfo;
3155  ScopedCpuTensorHandle weightTensor(kernelInfo);
3156  ScopedCpuTensorHandle biasTensor(biasInfo);
3157 
3158  AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
3159  AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
3160 
3161  Convolution2dQueueDescriptor queueDescriptor;
3162  queueDescriptor.m_Parameters = descriptor;
3163  queueDescriptor.m_Weight = &weightTensor;
3164  queueDescriptor.m_Bias = &biasTensor;
3165 
3166  AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
3167  AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
3168 
3169  std::unique_ptr<IWorkload> workload = workloadFactory.CreateConvolution2d(queueDescriptor, workloadInfo);
3170  inputHandle->Allocate();
3171  outputHandle->Allocate();
3172 
3173  CopyDataToITensorHandle(inputHandle.get(), inputData.data());
3174 
3175  ExecuteWorkload(*workload, memoryManager);
3176 
3177  LayerTestResult<uint8_t, 4> ret(outputInfo);
3178  CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
3179  ret.outputExpected = MakeTensor<uint8_t, 4>(outputInfo, expectedOutputData);
3180 
3181  return ret;
3182 }
3183 
3185  armnn::IWorkloadFactory& workloadFactory,
3186  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3187  armnn::IWorkloadFactory& refWorkloadFactory)
3188 {
3189  return CompareConvolution2dTestImpl<armnn::DataType::Float32>(
3190  workloadFactory, memoryManager, refWorkloadFactory);
3191 }
3192 
3194  armnn::IWorkloadFactory& workloadFactory,
3195  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3196  bool biasEnabled,
3197  const armnn::DataLayout layout)
3198 {
3199  return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3200  workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
3201 }
3202 
3204  armnn::IWorkloadFactory& workloadFactory,
3205  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3206  bool biasEnabled)
3207 {
3208  return DepthwiseConvolution2dNhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3209  workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
3210 }
3211 
3213  armnn::IWorkloadFactory& workloadFactory,
3214  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3215  bool biasEnabled,
3216  const armnn::DataLayout layout)
3217 {
3218  return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3219  workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
3220 }
3221 
3223  armnn::IWorkloadFactory& workloadFactory,
3224  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3225 {
3226  armnn::TensorInfo inputTensorInfo({ 1, 1, 2, 2 }, armnn::DataType::Float32);
3227  auto input = MakeTensor<float, 4>(inputTensorInfo, { 1.f, 2.f, 3.f, 4.f });
3228 
3229  std::vector<float> kernelData;
3230  std::vector<float> singleDepthKernel{ 1.f, -1.f, -1.f, 1.f };
3231  for (unsigned int i = 0; i < 64; ++i)
3232  {
3233  kernelData.insert(kernelData.end(), singleDepthKernel.begin(), singleDepthKernel.end());
3234  }
3235  armnn::TensorInfo kernelTensorInfo({ 64, 1, 2, 2 }, armnn::DataType::Float32);
3236  auto kernel = MakeTensor<float, 4>(kernelTensorInfo, kernelData);
3237 
3238  std::vector<float> expectedOutputData(64, 0.f);
3239  armnn::TensorInfo outputTensorInfo({ 1, 64, 1, 1 }, armnn::DataType::Float32);
3240  auto expectedOutput = MakeTensor<float, 4>(outputTensorInfo, expectedOutputData);
3241 
3242  return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3243  workloadFactory,
3244  memoryManager,
3245  input,
3246  kernel,
3247  boost::multi_array<float, 1>(),
3248  expectedOutput,
3249  0.f,
3250  0,
3252 }
3253 
3255  armnn::IWorkloadFactory& workloadFactory,
3256  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3257  bool biasEnabled,
3258  const armnn::DataLayout layout)
3259 {
3260  return DepthwiseConvolution2dAsymmetricTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3261  workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
3262 }
3263 
3265  armnn::IWorkloadFactory& workloadFactory,
3266  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3267  bool biasEnabled,
3268  const armnn::DataLayout layout)
3269 {
3270  return DepthwiseConvolution2dTestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3271  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
3272 }
3273 
3275  armnn::IWorkloadFactory& workloadFactory,
3276  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3277  bool biasEnabled,
3278  const armnn::DataLayout layout)
3279 {
3280  return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3281  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
3282 }
3283 
3285  armnn::IWorkloadFactory& workloadFactory,
3286  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3287 {
3288  return SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3289  workloadFactory,
3290  memoryManager,
3291  0.f,
3292  0,
3293  false);
3294 }
3295 
3297  armnn::IWorkloadFactory& workloadFactory,
3298  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3299  bool biasEnabled,
3300  const armnn::DataLayout layout)
3301 {
3302  return DepthwiseConvolution2dTestImpl<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3303  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
3304 }
3305 
3307  armnn::IWorkloadFactory& workloadFactory,
3308  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3309  bool biasEnabled,
3310  const armnn::DataLayout layout)
3311 {
3312  return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3313  workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
3314 }
3315 
3317  armnn::IWorkloadFactory& workloadFactory,
3318  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3319  const armnn::DataLayout layout)
3320 {
3321  using namespace armnn;
3322 
3323  const DataType inputType = DataType::QAsymmU8;
3324  const DataType kernelType = DataType::QSymmS8;
3325  const DataType biasType = DataType::Signed32;
3326 
3327  TensorInfo inputInfo ({ 1, 3, 3, 2 }, inputType, 0.5f, 128); // N H W C
3328  TensorInfo outputInfo({ 1, 2, 2, 4 }, inputType, 1.0f, 128); // N H W C
3329 
3330  const std::vector<float> quantScales{ 1.0f, 0.5f, 1.0f, 0.5f };
3331  const unsigned int quantDimension = 0;
3332  TensorInfo kernelInfo({ 2, 2, 2, 2 }, kernelType, quantScales, quantDimension); // M I H W
3333 
3334  const std::vector<float> biasQuantScales{ 0.5f, 0.25f, 0.5f, 0.25f };
3335  constexpr unsigned int biasQuantDimension = 0;
3336  TensorInfo biasInfo({ 4 }, biasType, biasQuantScales, biasQuantDimension);
3337 
3338  std::vector<uint8_t> inputData =
3339  {
3340  129, 130,
3341  129, 130,
3342  129, 130,
3343  129, 130,
3344  129, 130,
3345  129, 130,
3346  129, 130,
3347  129, 130,
3348  129, 130
3349  };
3350 
3351  std::vector<int8_t> kernelData =
3352  {
3353  1, 1, 1, 1,
3354  1, 1, 1, 1,
3355  1, 1, 1, 1,
3356  1, 1, 1, 1
3357  };
3358 
3359  std::vector<int32_t> biasData =
3360  {
3361  4, 4, 4, 4
3362  };
3363 
3364  std::vector<uint8_t> expectedOutputData =
3365  {
3366  132, 130, 134, 131,
3367  132, 130, 134, 131,
3368  132, 130, 134, 131,
3369  132, 130, 134, 131
3370  };
3371 
3372  if (layout == DataLayout::NCHW)
3373  {
3374  PermuteTensorNhwcToNchw(inputInfo, inputData);
3375  PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
3376  }
3377 
3379  descriptor.m_StrideX = 1;
3380  descriptor.m_StrideY = 1;
3381  descriptor.m_PadLeft = 0;
3382  descriptor.m_PadRight = 0;
3383  descriptor.m_PadTop = 0;
3384  descriptor.m_PadBottom = 0;
3385  descriptor.m_DilationX = 1;
3386  descriptor.m_DilationY = 1;
3387  descriptor.m_BiasEnabled = true;
3388  descriptor.m_DataLayout = layout;
3389 
3390  std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo);
3391  std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
3392 
3393  WorkloadInfo workloadInfo;
3394  ScopedCpuTensorHandle weightTensor(kernelInfo);
3395  ScopedCpuTensorHandle biasTensor(biasInfo);
3396 
3397  AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
3398  AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
3399 
3400  DepthwiseConvolution2dQueueDescriptor queueDescriptor;
3401  queueDescriptor.m_Parameters = descriptor;
3402  queueDescriptor.m_Weight = &weightTensor;
3403  queueDescriptor.m_Bias = &biasTensor;
3404 
3405  AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
3406  AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
3407 
3408  std::unique_ptr<IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(queueDescriptor, workloadInfo);
3409  inputHandle->Allocate();
3410  outputHandle->Allocate();
3411 
3412  CopyDataToITensorHandle(inputHandle.get(), inputData.data());
3413 
3414  ExecuteWorkload(*workload, memoryManager);
3415 
3416  LayerTestResult<uint8_t, 4> ret(outputInfo);
3417 
3418  CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
3419  ret.outputExpected = MakeTensor<uint8_t, 4>(outputInfo, expectedOutputData);
3420 
3421  return ret;
3422 }
3423 
3425  armnn::IWorkloadFactory& workloadFactory,
3426  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3427  armnn::IWorkloadFactory& refWorkloadFactory,
3428  const armnn::DataLayout layout)
3429 {
3430  return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::Float32>(
3431  workloadFactory, memoryManager, refWorkloadFactory, layout);
3432 }
3433 
3435  armnn::IWorkloadFactory& workloadFactory,
3436  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3437  armnn::IWorkloadFactory& refWorkloadFactory,
3438  const armnn::DataLayout layout)
3439 {
3440  return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::QAsymmU8>(
3441  workloadFactory, memoryManager, refWorkloadFactory, layout);
3442 }
LayerTestResult< float, 4 > DepthwiseConvolution2dDepthMul64Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager)
uint32_t m_PadBottom
Padding bottom value in the height dimension.
bool m_BiasEnabled
Enable/disable bias.
LayerTestResult< T, 4 > SimpleConvolution2d3x3TestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< float, 4 > CompareDepthwiseConvolution2dFloatTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::IWorkloadFactory &refWorkloadFactory, const armnn::DataLayout layout)
const ConstCpuTensorHandle * m_Bias
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
LayerTestResult< uint8_t, 4 > DepthwiseConvolution2dDepthMul1Uint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::DataLayout layout, float qScale, int32_t qOffset)
bool m_BiasEnabled
Enable/disable bias.
LayerTestResult< float, 4 > SimpleConvolution2d3x5Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
DataLayout
Definition: Types.hpp:49
const TensorShape & GetShape() const
Definition: Tensor.hpp:88
uint32_t m_PadBottom
Padding bottom value in the height dimension.
LayerTestResult< uint8_t, 4 > DepthwiseConvolution2dPerAxisQuantTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::DataLayout layout)
void ApplyBias(std::vector< T > &v, float vScale, int32_t vOffset, const std::vector< B > &bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h)
LayerTestResult< float, 4 > Convolution1dTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled)
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
LayerTestResult< float, 4 > Convolution2dAsymmetricPaddingTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::DataLayout layout)
LayerTestResult< uint8_t, 4 > DepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
A Convolution2dDescriptor for the Convolution2dLayer.
uint32_t m_PadLeft
Padding left value in the width dimension.
LayerTestResult< T, 4 > DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > DepthwiseConvolution2dDepthMul1TestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout)
boost::multi_array< T, n > outputExpected
LayerTestResult< T, 4 > CompareDepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::IWorkloadFactory &refWorkloadFactory, const armnnUtils::DataLayoutIndexed &layout)
LayerTestResult< uint8_t, 4 > SimpleConvolution2d3x3Uint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > DepthwiseConvolution2d3x3DilationTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const std::vector< float > &inputNoQuantizedValues, armnn::TensorInfo &inputTensorInfo, const std::vector< float > &kernelNoQuantizedValues, armnn::TensorInfo &kernelTensorInfo, const std::vector< float > &outputExpectedNoQuantizedValues, armnn::TensorInfo &outputTensorInfo, uint32_t dilationX, uint32_t dilationY, armnn::DataLayout layout=armnn::DataLayout::NCHW, bool biasEnabled=false)
LayerTestResult< T, 4 > SimpleConvolution2dNhwcTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const boost::multi_array< T, 4 > &input, const boost::multi_array< T, 4 > &kernel, const boost::multi_array< B, 1 > &bias, const boost::multi_array< T, 4 > &outputExpected, const armnn::DataLayout dataLayout, float qScale, int32_t qOffset, uint32_t padLeft=1, uint32_t padTop=1, uint32_t padRight=1, uint32_t padBottom=1, uint32_t strideX=1, uint32_t strideY=1)
LayerTestResult< T, 4 > SimpleConvolution2dTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const boost::multi_array< T, 4 > &originalInput, const boost::multi_array< T, 4 > &originalKernel, const boost::multi_array< B, 1 > &bias, const boost::multi_array< T, 4 > &originalOutputExpected, float qScale, int32_t qOffset, const armnn::DataLayout layout=armnn::DataLayout::NCHW, uint32_t padLeft=0, uint32_t padTop=0, uint32_t padRight=0, uint32_t padBottom=0, uint32_t strideX=1, uint32_t strideY=1, uint32_t dilationX=1, uint32_t dilationY=1)
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:73
uint32_t m_PadRight
Padding right value in the width dimension.
LayerTestResult< int16_t, 4 > SimpleConvolution2d3x3QSymm16Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
Copyright (c) 2020 ARM Limited.
LayerTestResult< T, 4 > Convolution1dTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled)
void IgnoreUnused(Ts &&...)
uint32_t m_DilationY
Dilation along y axis.
uint32_t m_DilationY
Dilation factor value for height dimension.
LayerTestResult< float, 4 > SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager)
LayerTestResult< float, 4 > DepthwiseConvolution2dDepthNhwcTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled)
LayerTestResult< T, 4 > Convolution2d3x3Dilation3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
uint32_t m_PadTop
Padding top value in the height dimension.
LayerTestResult< uint8_t, 4 > Convolution2dPerAxisQuantTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::DataLayout layout)
void Permute(const armnn::TensorShape &dstShape, const armnn::PermutationVector &mappings, const void *src, void *dst, size_t dataTypeSize)
Definition: Permute.cpp:121
LayerTestResult< T, 4 > Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
LayerTestResult< T, 4 > CompareConvolution2dTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::IWorkloadFactory &refWorkloadFactory)
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
DataType
Definition: Types.hpp:32
uint32_t m_DilationX
Dilation factor value for width dimension.
uint32_t m_PadTop
Padding top value in the height dimension.
LayerTestResult< int16_t, 4 > SimpleConvolution2d3x5QSymm16Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
int32_t GetQuantizationOffset() const
Definition: Tensor.cpp:264
float GetQuantizationScale() const
Definition: Tensor.cpp:247
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout...
const ConstCpuTensorHandle * m_Weight
LayerTestResult< T, 4 > DepthwiseConvolution2dAsymmetricTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const boost::multi_array< T, 4 > &input, const boost::multi_array< T, 4 > &kernel, const boost::multi_array< B, 1 > &bias, const boost::multi_array< T, 4 > &outputExpected, float qScale, int32_t qOffset, const armnn::DataLayout layout, uint32_t padLeft=0, uint32_t padTop=0, uint32_t padRight=0, uint32_t padBottom=0, uint32_t strideX=1, uint32_t strideY=1)
LayerTestResult< T, 4 > DepthwiseConvolution2d2x3x3Dilation3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > DepthwiseConvolution2dMult2Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
boost::multi_array< T, 1 > GetBias2(bool biasEnabled, float qScale)
void SetQuantizationScale(float scale)
Definition: Tensor.cpp:259
LayerTestResult< int16_t, 4 > DepthwiseConvolution2dDepthMul1Int16Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled)
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:33
LayerTestResult< float, 4 > SimpleConvolution2d3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
void AllocateAndCopyDataToITensorHandle(armnn::ITensorHandle *tensorHandle, const void *memory)
armnn::DataLayout GetDataLayout() const
LayerTestResult< T, 4 > DepthwiseConvolution2dNhwcTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled)
void CopyDataFromITensorHandle(void *memory, const armnn::ITensorHandle *tensorHandle)
boost::multi_array< T, 1 > GetBias4(bool biasEnabled, float qScale)
LayerTestResult< T, 4 > Convolution2d2x3x3Dilation3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< float, 4 > CompareConvolution2dTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::IWorkloadFactory &refWorkloadFactory)
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
virtual std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo, const bool IsMemoryManaged=true) const =0
DataType GetBiasDataType(DataType inputDataType)
LayerTestResult< float, 4 > DepthwiseConvolution2dAsymmetricTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
boost::multi_array< T, n > output
LayerTestResult< float, 4 > Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::DataLayout layout)
LayerTestResult< T, 4 > Convolution2d3x3DilationTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const std::vector< float > &inputNoQuantizedValues, armnn::TensorInfo &inputTensorInfo, const std::vector< float > &kernelNoQuantizedValues, armnn::TensorInfo &kernelTensorInfo, const std::vector< float > &outputExpectedNoQuantizedValues, armnn::TensorInfo &outputTensorInfo, uint32_t dilationX, uint32_t dilationY, armnn::DataLayout layout=armnn::DataLayout::NCHW, uint32_t padLeft=0, uint32_t padTop=0, uint32_t padRight=0, uint32_t padBottom=0, uint32_t strideX=1, uint32_t strideY=1, bool biasEnabled=false)
uint32_t m_DilationX
Dilation along x axis.
boost::multi_array< T, 1 > GetBias8(bool biasEnabled, float qScale)
LayerTestResult< float, 4 > SimpleConvolution2d3x3NhwcTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled)
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
LayerTestResult< T, 4 > Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::DataLayout layout, float qScale, int32_t qOffset)
LayerTestResult< T, 4 > SimpleConvolution2d3x3Stride2x2TestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout &dataLayout)
boost::multi_array< T, 1 > GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout)
armnn::TensorInfo GetTensorInfo(unsigned int numberOfBatches, unsigned int numberOfChannels, unsigned int height, unsigned int width, const armnn::DataLayout dataLayout, const armnn::DataType dataType)
Definition: TensorUtils.cpp:38
Contains information about inputs and outputs to a layer.
LayerTestResult< float, 4 > DepthwiseConvolution2dTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > SimpleConvolution2d3x5TestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout)
float SelectiveDequantize(T value, float scale, int32_t offset)
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:275
LayerTestResult< T, 4 > DepthwiseConvolution2d3x3Dilation3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< uint8_t, 4 > Convolution1dUint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled)
LayerTestResult< T, 4 > DepthwiseConvolution2dAsymmetricTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< uint8_t, 4 > CompareDepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::IWorkloadFactory &refWorkloadFactory, const armnn::DataLayout layout)
LayerTestResult< uint8_t, 4 > SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > SimpleConvolution2d3x3NhwcTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, float qScale, int32_t qOffset, bool biasEnabled, armnn::DataLayout dataLayout)
LayerTestResult< int16_t, 4 > DepthwiseConvolution2dInt16Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
virtual std::unique_ptr< IWorkload > CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const
unsigned int GetChannelsIndex() const
LayerTestResult< float, 4 > DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > DepthwiseConvolution2dMult4Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< float, 4 > SimpleConvolution2d3x3Stride2x2Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, bool biasEnabled, const armnn::DataLayout layout)
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
uint32_t m_PadLeft
Padding left value in the width dimension.
virtual std::unique_ptr< IWorkload > CreateConvolution2d(const Convolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const
void CopyDataToITensorHandle(armnn::ITensorHandle *tensorHandle, const void *memory)
uint32_t m_PadRight
Padding right value in the width dimension.
void PermuteTensorNhwcToNchw(armnn::TensorInfo &tensorInfo, std::vector< T > &tensorData)