ArmNN
 21.02
Conv2dTestImpl.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "Conv2dTestImpl.hpp"
7 
8 #include <QuantizeHelper.hpp>
10 
14 #include <armnnUtils/Permute.hpp>
15 
17 
21 
22 #include <test/TensorHelpers.hpp>
23 
24 #include <string>
25 
26 //
27 // Static data
28 //
29 
30 // 2-channel bias used by a number of Conv2d tests.
31 static std::vector<float> Bias2({0, 2});
32 
33 static std::vector<float> Bias4({1, 2, 3, 4});
34 
35 static std::vector<float> Bias8({1, 2, 3, 4, 1, 2, 3, 4});
36 
37 // 3-channel 16x8 image used as common input data for a number of Conv2d tests.
38 static std::vector<float> ConvInput3x8x16({
39  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
40  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
41  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
42  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
43  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
44  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
45  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
46  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
47  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
56  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
57  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
58  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
59  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
60  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
61  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
62  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
63 });
64 
65 using namespace armnnUtils;
66 
67 //
68 // Helper templates
69 //
70 
71 // Helper template that returns either Bias2 or an empty vector depending on whether bias is enabled.
72 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
73 boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale)
74 {
75  if(biasEnabled)
76  {
77  armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, ArmnnType);
78  boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias2, qScale, 0));
79  return bias;
80  }
81  else
82  {
83  return boost::multi_array<T, 1>();
84  }
85 }
86 
87 // Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
88 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
89 boost::multi_array<T, 1> GetBias4(bool biasEnabled, float qScale)
90 {
91  if(biasEnabled)
92  {
93  armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
94  boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias4, qScale, 0));
95  return bias;
96  }
97  else
98  {
99  return boost::multi_array<T, 1>();
100  }
101 }
102 
103 // Helper template that returns either Bias8 or an empty vector depending on whether bias is enabled.
104 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
105 boost::multi_array<T, 1> GetBias8(bool biasEnabled, float qScale)
106 {
107  if(biasEnabled)
108  {
109  armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
110  boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(Bias8, qScale, 0));
111  return bias;
112  }
113  else
114  {
115  return boost::multi_array<T, 1>();
116  }
117 }
118 
119 // Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
120 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
121 boost::multi_array<T, 1> GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout)
122 {
123  const armnnUtils::DataLayoutIndexed dataLayoutIndexed(layout);
124  const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
125  const unsigned int outputChannels = outputInfo.GetShape()[channelsIndex];
126 
127  switch (outputChannels)
128  {
129  case 2:
130  default:
131  {
132  return GetBias2<ArmnnType>(biasEnabled, qScale);
133  }
134  case 4:
135  {
136  return GetBias4<ArmnnType>(biasEnabled, qScale);
137  }
138  case 8:
139  {
140  return GetBias8<ArmnnType>(biasEnabled, qScale);
141  }
142  }
143 }
144 
145 //
146 // Implementation templates
147 //
148 
149 // Mapping from input type to bias type for fully connected layers.
150 // float => float, uint8_t => int32_t
151 template<typename T>
152 struct FullyConnectedBiasTypeForInputType;
153 
154 template<>
155 struct FullyConnectedBiasTypeForInputType<float>
156 {
157  using Type = float;
158 };
159 
160 template<>
161 struct FullyConnectedBiasTypeForInputType<uint8_t>
162 {
163  using Type = int32_t;
164 };
165 
166 // Modifies a std::vector in-place using a specified bias.
167 template<typename T, typename B>
168 void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset,
169  const std::vector<B>& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h)
170 {
171  ARMNN_ASSERT_MSG((armnn::IsQuantizedType<T>() && vScale != 0.0f) || (!armnn::IsQuantizedType<T>()),
172  "Invalid type and parameter combination.");
173  ARMNN_ASSERT_MSG((armnn::IsQuantizedType<B>() && bScale != 0.0f) || (!armnn::IsQuantizedType<B>()),
174  "Invalid type and parameter combination.");
175 
176  // Note we need to dequantize and re-quantize the image value and the bias.
177  for (uint32_t i = 0; i < bias.size(); ++i)
178  {
179  float dBias = SelectiveDequantize(bias[i], bScale, bOffset);
180  for (uint32_t y = 0; y < h; ++y)
181  {
182  for (uint32_t x = 0; x < w; ++x)
183  {
184  uint32_t offset = (i * h + y) * w + x;
185  ARMNN_ASSERT(offset < v.size());
186  T& outRef = v[offset];
187  float dOutput = SelectiveDequantize(outRef, vScale, vOffset);
188  outRef = SelectiveQuantize<T>(dOutput + dBias, vScale, vOffset);
189  }
190  }
191  }
192 }
193 
194 //
195 // Convolution2d implementations
196 //
197 
198 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
201  armnn::IWorkloadFactory& workloadFactory,
203  const armnn::ITensorHandleFactory& tensorHandleFactory,
204  const boost::multi_array<T, 4>& originalInput,
205  const boost::multi_array<T, 4>& originalKernel,
206  const boost::multi_array<B, 1>& bias,
207  const boost::multi_array<T, 4>& originalOutputExpected,
208  float qScale,
209  int32_t qOffset,
211  uint32_t padLeft = 0,
212  uint32_t padTop = 0,
213  uint32_t padRight = 0,
214  uint32_t padBottom = 0,
215  uint32_t strideX = 1,
216  uint32_t strideY = 1,
217  uint32_t dilationX = 1,
218  uint32_t dilationY = 1)
219 {
220  armnn::IgnoreUnused(memoryManager);
221  unsigned int inputHeight = armnn::numeric_cast<unsigned int>(originalInput.shape()[2]);
222  unsigned int inputWidth = armnn::numeric_cast<unsigned int>(originalInput.shape()[3]);
223  unsigned int inputChannels = armnn::numeric_cast<unsigned int>(originalInput.shape()[1]);
224  unsigned int inputNum = armnn::numeric_cast<unsigned int>(originalInput.shape()[0]);
225 
226  unsigned int outputHeight = armnn::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
227  unsigned int outputWidth = armnn::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
228  unsigned int outputChannels = armnn::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
229  unsigned int outputNum = armnn::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
230 
231  unsigned int kernelHeight = armnn::numeric_cast<unsigned int>(originalKernel.shape()[2]);
232  unsigned int kernelWidth = armnn::numeric_cast<unsigned int>(originalKernel.shape()[3]);
233  unsigned int kernelChannels = armnn::numeric_cast<unsigned int>(originalKernel.shape()[1]);
234  unsigned int kernelDepthMul = armnn::numeric_cast<unsigned int>(originalKernel.shape()[0]);
235 
236  bool biasEnabled = bias.size() > 0;
237 
238  // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
239  ARMNN_ASSERT(inputNum == 1);
240  ARMNN_ASSERT(outputNum == 1);
241 
242  // If a bias is used, its size must equal the number of output channels.
243  ARMNN_ASSERT(!biasEnabled || bias.size() == outputChannels);
244 
245 
246  // Note these tensors will use two (identical) batches.
247  armnn::TensorInfo inputTensorInfo =
248  armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
249  armnn::TensorInfo outputTensorInfo =
250  armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
251  armnn::TensorInfo kernelDesc =
252  armnnUtils::GetTensorInfo(kernelDepthMul, kernelChannels, kernelHeight, kernelWidth, layout, ArmnnType);
253  armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
254 
255  // Set quantization parameters if the requested type is a quantized type.
256  if(armnn::IsQuantizedType<T>())
257  {
258  inputTensorInfo.SetQuantizationScale(qScale);
259  inputTensorInfo.SetQuantizationOffset(qOffset);
260  outputTensorInfo.SetQuantizationScale(qScale);
261  outputTensorInfo.SetQuantizationOffset(qOffset);
262  kernelDesc.SetQuantizationScale(qScale);
263  kernelDesc.SetQuantizationOffset(qOffset);
264  biasDesc.SetQuantizationScale(qScale*qScale);
265  biasDesc.SetQuantizationOffset(0);
266  }
267 
268  LayerTestResult<T, 4> ret(outputTensorInfo);
269 
270  // Construct input data - two batches of the same input image.
271  std::vector<T> inputImage;
272  inputImage.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth);
273  std::vector<T> inputData;
274  inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
275  inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
276 
277  // at this point if we require it permute the input data
278  const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
279  if (layout == armnn::DataLayout::NHWC)
280  {
281  std::vector<T> tmp(inputData.size());
282  armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
283  inputData = tmp;
284  }
285 
286  auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
287 
288  std::vector<T> outputImage;
289  outputImage.assign(originalOutputExpected.data(),
290  originalOutputExpected.data() + outputChannels*outputHeight*outputWidth);
291 
292  // Apply bias to output image if it is enabled.
293  if(biasEnabled)
294  {
295  std::vector<T> biasV;
296  biasV.assign(bias.data(), bias.data() + outputChannels);
297  ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
298  biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
299  outputWidth, outputHeight);
300  }
301 
302  // Construct expected output data - two identical images.
303  std::vector<T> outputData;
304  outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
305  outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
306 
307  // at this point if we require it permute the expected output
308  if (layout == armnn::DataLayout::NHWC)
309  {
310  std::vector<T> tmp(outputData.size());
311  armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
312  outputData = tmp;
313  }
314  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
315 
316  std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
317  std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
318 
321  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
322  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
323  // Permute the kernel if necessary
324  boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
325  if (layout == armnn::DataLayout::NHWC)
326  {
327  armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data(), sizeof(T));
328  }
329  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
330 
331  if(biasEnabled)
332  {
333  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
334  }
335 
336  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
337  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
338 
339  data.m_Weight = &weightsTensor;
340  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
341  data.m_Parameters.m_StrideX = strideX;
342  data.m_Parameters.m_StrideY = strideY;
343  data.m_Parameters.m_PadLeft = padLeft;
344  data.m_Parameters.m_PadRight = padRight;
345  data.m_Parameters.m_PadTop = padTop;
346  data.m_Parameters.m_PadBottom = padBottom;
347  data.m_Parameters.m_BiasEnabled = biasEnabled;
348  data.m_Parameters.m_DataLayout = layout;
349  data.m_Parameters.m_DilationX = dilationX;
350  data.m_Parameters.m_DilationY = dilationY;
351 
352  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
353  inputHandle->Allocate();
354  outputHandle->Allocate();
355 
356  CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
357 
358  ExecuteWorkload(*workload, memoryManager);
359 
360  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
361 
362  return ret;
363 }
364 
365 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
367  armnn::DataType OutType = ArmnnType, typename O = armnn::ResolveType<OutType>>
369  armnn::IWorkloadFactory& workloadFactory,
371  const armnn::ITensorHandleFactory& tensorHandleFactory,
372  const boost::multi_array<T, 4>& input,
373  const boost::multi_array<T, 4>& kernel,
374  const boost::multi_array<B, 1>& bias,
375  const boost::multi_array<O, 4>& outputExpected,
376  const armnn::DataLayout dataLayout,
377  float qScale,
378  int32_t qOffset,
379  uint32_t padLeft = 1,
380  uint32_t padTop = 1,
381  uint32_t padRight = 1,
382  uint32_t padBottom = 1,
383  uint32_t strideX = 1,
384  uint32_t strideY = 1)
385 {
386  armnn::IgnoreUnused(qScale, qOffset);
387  unsigned int inputNum = armnn::numeric_cast<unsigned int>(input.shape()[0]);
388  unsigned int inputChannels = armnn::numeric_cast<unsigned int>(input.shape()[3]);
389  unsigned int inputHeight = armnn::numeric_cast<unsigned int>(input.shape()[1]);
390  unsigned int inputWidth = armnn::numeric_cast<unsigned int>(input.shape()[2]);
391 
392  unsigned int kernelChanMul = armnn::numeric_cast<unsigned int>(kernel.shape()[0]);
393  unsigned int kernelChannels = armnn::numeric_cast<unsigned int>(kernel.shape()[3]);
394  unsigned int kernelHeight = armnn::numeric_cast<unsigned int>(kernel.shape()[1]);
395  unsigned int kernelWidth = armnn::numeric_cast<unsigned int>(kernel.shape()[2]);
396 
397  unsigned int outputNum = armnn::numeric_cast<unsigned int>(outputExpected.shape()[0]);
398  unsigned int outputChannels = armnn::numeric_cast<unsigned int>(outputExpected.shape()[3]);
399  unsigned int outputHeight = armnn::numeric_cast<unsigned int>(outputExpected.shape()[1]);
400  unsigned int outputWidth = armnn::numeric_cast<unsigned int>(outputExpected.shape()[2]);
401 
402  bool biasEnabled = bias.size() > 0;
403 
404  // Creates the tensors.
405  armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, ArmnnType);
406  armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels},
407  OutType);
408  armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, ArmnnType);
409  armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
410 
411  // Construct the input data.
412  std::vector<T> inputData;
413  inputData.assign(input.data(), input.data() + inputHeight*inputWidth*inputChannels);
414  auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
415 
416  // Construct the output data, with bias applied, as appropriate.
417  std::vector<O> outputData;
418  outputData.assign(outputExpected.data(), outputExpected.data() + outputHeight*outputWidth*outputChannels);
419 
420  LayerTestResult<O, 4> ret(outputTensorInfo);
421  ret.outputExpected = MakeTensor<O, 4>(outputTensorInfo, outputData);
422 
423  std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
424  std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
425 
426  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
427  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
428 
429  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
430 
432 
433  data.m_Weight = &weightsTensor;
434  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
435  data.m_Parameters.m_StrideX = strideX;
436  data.m_Parameters.m_StrideY = strideY;
437  data.m_Parameters.m_PadLeft = padLeft;
438  data.m_Parameters.m_PadRight = padRight;
439  data.m_Parameters.m_PadTop = padTop;
440  data.m_Parameters.m_PadBottom = padBottom;
441  data.m_Parameters.m_BiasEnabled = biasEnabled;
442  data.m_Parameters.m_DataLayout = dataLayout;
443 
445  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
446  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
447 
448  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
449  inputHandle->Allocate();
450  outputHandle->Allocate();
451 
452  CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
453 
454  ExecuteWorkload(*workload, memoryManager);
455 
456  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
457 
458  return ret;
459 }
460 
461 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
463  armnn::IWorkloadFactory& workloadFactory,
465  const armnn::ITensorHandleFactory& tensorHandleFactory,
466  float qScale,
467  int32_t qOffset,
468  bool biasEnabled)
469 {
471  // Until we have a specialist 1D convolution layer, we can fake one using
472  // 2D convolution with the final dimension set to 1.
473  // I don't anticipate this being particularly slow, given that convolution is implemented
474  // as a matrix multiplication, at which point dimension doesn't matter.
475 
476  unsigned int batchSize = 1;
477  unsigned int inputChannels = 2;
478  unsigned int outputChannels = 3;
479  unsigned int inputSize = 5; // The 1D size (could view as 'width' or 'height').
480  unsigned int kernelSize = 3;
481  unsigned int padSize = 2;
482  unsigned int stride = 1;
483  unsigned int outputSize = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride.
484 
485  armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, ArmnnType);
486  armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, ArmnnType);
487  armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, ArmnnType);
488  armnn::TensorInfo biasInfo({outputChannels}, ArmnnBType);
489 
490  // Set quantization parameters if the requested type is a quantized type.
491  if(armnn::IsQuantizedType<T>())
492  {
493  inputInfo.SetQuantizationScale(qScale);
494  inputInfo.SetQuantizationOffset(qOffset);
495  outputInfo.SetQuantizationScale(qScale);
496  outputInfo.SetQuantizationOffset(qOffset);
497  kernelInfo.SetQuantizationScale(qScale);
498  kernelInfo.SetQuantizationOffset(qOffset);
499  biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale());
500  biasInfo.SetQuantizationOffset(0);
501  }
502 
503  std::vector<T> inputData = QuantizedVector<T>(
504  {
505  5.0f, -2.0f, 2.5f, 0.0f, 1.0f,
506  -3.0f, 3.2f, 5.0f, 2.0f, 3.0f,
507  },
508  inputInfo.GetQuantizationScale(),
509  inputInfo.GetQuantizationOffset());
510 
511  std::vector<T> kernelData = QuantizedVector<T>(
512  {
513  1.0f, 0.0f, 0.0f,
514  0.0f, 2.0f, -1.5f,
515 
516  0.0f, 0.0f, 0.0f,
517  0.2f, 0.2f, 0.2f,
518 
519  0.5f, 0.0f, 0.5f,
520  0.0f, -1.0f, 0.0f
521  },
522  kernelInfo.GetQuantizationScale(),
523  kernelInfo.GetQuantizationOffset());
524 
525  std::vector<B> biasData =
526  QuantizedVector<B>({ 1.0f, 0.0f, 0.0f }, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset());
527 
528  std::vector<T> outputData = QuantizedVector<T>(
529  {
530  4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f,
531  -0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f,
532  2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f
533  },
534  outputInfo.GetQuantizationScale(),
535  outputInfo.GetQuantizationOffset());
536 
537  // Optionally apply bias to output image.
538  if(biasEnabled)
539  {
540  ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(),
541  biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(),
542  1, outputSize);
543  }
544 
545  std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
546  std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
547 
550  armnn::ScopedCpuTensorHandle weightsTensor(kernelInfo);
551  armnn::ScopedCpuTensorHandle biasTensor(biasInfo);
552 
553  AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data());
554  AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
555 
556  AddInputToWorkload(data, info, inputInfo, inputHandle.get());
557  AddOutputToWorkload(data, info, outputInfo, outputHandle.get());
558 
559  data.m_Weight = &weightsTensor;
560  data.m_Bias = &biasTensor;
561  data.m_Parameters.m_StrideX = 1;
562  data.m_Parameters.m_StrideY = stride;
563  data.m_Parameters.m_PadLeft = 0;
564  data.m_Parameters.m_PadRight = 0;
565  data.m_Parameters.m_PadTop = padSize;
566  data.m_Parameters.m_PadBottom = padSize;
567  data.m_Parameters.m_BiasEnabled = biasEnabled;
568 
569  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
570  inputHandle->Allocate();
571  outputHandle->Allocate();
572 
573  CopyDataToITensorHandle(inputHandle.get(), inputData.data());
574 
575  ExecuteWorkload(*workload, memoryManager);
576 
577  // Output
578  LayerTestResult<T,4> ret(outputInfo);
579  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
580  ret.outputExpected = MakeTensor<T, 4>(outputInfo, outputData);
581  return ret;
582 }
583 
584 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
586  armnn::IWorkloadFactory& workloadFactory,
588  const armnn::ITensorHandleFactory& tensorHandleFactory,
589  float qScale,
590  int32_t qOffset,
591  bool biasEnabled,
592  armnn::DataLayout dataLayout)
593 {
594  armnn::IgnoreUnused(biasEnabled);
595  // Use common single-batch 5x5 image.
596 
597  armnn::TensorInfo inputDesc({1, 3, 4, 1}, ArmnnType);
598  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
599  {
600  1, 5, 2, 3,
601  8, 7, 3, 6,
602  3, 3, 9, 1
603  });
604 
605 
606  // Use a 2-element batch of 3-channel 3x3 kernels.
607  armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
608  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, {
609  4, 5, 6,
610  0, 0, 0,
611  3, 2, 1
612  });
613 
614  // Expected output is 1 batch of a 5x5 image.
615  armnn::TensorInfo outputDesc({1, 3, 4, 1}, ArmnnType);
616 
617  const std::vector<float> outputData =
618  {
619  23, 41, 33, 21,
620  44, 65, 76, 52,
621  82, 85, 79, 42
622  };
623 
624  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
625 
626  return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
627  workloadFactory,
628  memoryManager,
629  tensorHandleFactory,
630  input,
631  kernel,
632  boost::multi_array<T, 1>(),
633  expectedOutput,
634  dataLayout,
635  qScale,
636  qOffset);
637 }
638 
639 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
641  armnn::IWorkloadFactory& workloadFactory,
643  const armnn::ITensorHandleFactory& tensorHandleFactory,
644  float qScale,
645  int32_t qOffset,
646  bool biasEnabled,
647  const armnn::DataLayout& dataLayout)
648 {
649  armnn::IgnoreUnused(biasEnabled);
650 
651  // Input is a single-batch, 1 channel, 5x5 image.
652  armnn::TensorInfo inputDesc({1, 5, 5, 1}, ArmnnType);
653  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
654  {
655  1, 5, 2, 3, 5,
656  8, 7, 3, 6, 3,
657  3, 3, 9, 1, 9,
658  4, 1, 8, 1, 3,
659  6, 8, 1, 9, 2
660  });
661 
662  // Use a 3x3 kernel.
663  armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
664  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc,
665  {
666  4, 5, 6,
667  0, 0, 0,
668  3, 2, 1
669  });
670 
671  // Expected output is a single-batch, 1 channel, 3x3 image.
672  armnn::TensorInfo outputDesc({1, 3, 3, 1}, ArmnnType);
673 
674  const std::vector<T> outputData =
675  {
676  23, 33, 24,
677  91, 99, 48,
678  26, 50, 19
679  };
680 
681  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
682 
683  uint32_t padLeft = 1;
684  uint32_t padTop = 1;
685  uint32_t padRight = 1;
686  uint32_t padBottom = 1;
687  uint32_t strideX = 2;
688  uint32_t strideY = 2;
689 
690  return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
691  workloadFactory,
692  memoryManager,
693  tensorHandleFactory,
694  input,
695  kernel,
696  boost::multi_array<T, 1>(),
697  expectedOutput,
698  dataLayout,
699  qScale,
700  qOffset,
701  padLeft,
702  padTop,
703  padRight,
704  padBottom,
705  strideX,
706  strideY);
707 }
708 
709 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
711  armnn::IWorkloadFactory& workloadFactory,
713  const armnn::ITensorHandleFactory& tensorHandleFactory,
714  float qScale,
715  int32_t qOffset,
716  bool biasEnabled,
717  const armnn::DataLayout layout)
718 {
719  // Use common single-batch 3-channel 16x8 image.
720  armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
721  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset));
722 
723  // Use a 2-element batch with 3-channel 3x5 kernels.
724  armnn::TensorInfo kernelDesc({2, 3, 5, 3}, ArmnnType);
725  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
726  QuantizedVector<T>({
727  1, 1, 1,
728  1, -1, 1,
729  1, 1, 1,
730  1, 1, 1,
731  1, 1, 1,
732 
733  0, 0, 0,
734  0, 0, 0,
735  0, 0, 0,
736  0, 0, 0,
737  0, 0, 0,
738 
739  2, 2, 2,
740  2, 2, 2,
741  2, 2, 2,
742  2, 2, 2,
743  2, 2, 2,
744 
745 
746  0, 0, 0,
747  0, 0, 0,
748  0, 0, 0,
749  0, 0, 0,
750  0, 0, 0,
751 
752  1, 1, 1,
753  1, 1, 1,
754  1, 1, 1,
755  1, 1, 1,
756  1, 1, 1,
757 
758  0, 0, 0,
759  0, 0, 0,
760  0, 0, 0,
761  0, 0, 0,
762  0, 0, 0
763  },
764  qScale, qOffset)));
765 
766  // Expected output is 2 batch elements of a 1-channel 14x4 image.
767  armnn::TensorInfo outputDesc({1, 2, 4, 14}, ArmnnType);
768  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
769  QuantizedVector<T>({
770  -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
771  -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
772  -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
773  -23.5f, -23.5f, -23.5f,
774  -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
775  -23.5f, -23.5f, -23.5f,
776 
777  5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
778  5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
779  5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
780  5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
781  },
782  qScale, qOffset)));
783 
784  return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
785  workloadFactory,
786  memoryManager,
787  tensorHandleFactory,
788  input,
789  kernel,
790  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
791  expectedOutput,
792  qScale,
793  qOffset,
794  layout);
795 }
796 
797 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
798  typename T = armnn::ResolveType<ArmnnType>>
800  armnn::IWorkloadFactory& workloadFactory,
802  const armnn::ITensorHandleFactory& tensorHandleFactory,
803  float qScale,
804  int32_t qOffset,
805  bool biasEnabled,
806  const armnn::DataLayout layout)
807 {
808  // Use a 3x3 kernel, which exercises ArmCompute's direct convolution path.
809 
810  // Use common single-batch 3-channel 16x8 image.
811  armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
812  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset));
813 
814  // Use a 2-element batch of 3-channel 3x3 kernels.
815  armnn::TensorInfo kernelDesc({2, 3, 3, 3}, ArmnnType);
816  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
817  QuantizedVector<T>({
818  1, 1, 1,
819  1, -1, 1,
820  1, 1, 1,
821 
822  0, 0, 0,
823  0, 0, 0,
824  0, 0, 0,
825 
826  2, 2, 2,
827  2, 2, 2,
828  2, 2, 2,
829 
830 
831  0, 0, 0,
832  0, 0, 0,
833  0, 0, 0,
834 
835  1, 1, 1,
836  1, 1, 1,
837  1, 1, 1,
838 
839  0, 0, 0,
840  0, 0, 0,
841  0, 0, 0
842  },
843  qScale, qOffset)));
844 
845  // Expected output is 1 batch of a 2-channel 14x6 image.
846  armnn::TensorInfo outputDesc({1, 2, 6, 14}, ArmnnType);
847  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
848  QuantizedVector<T>({
849  -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
850  -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
851  -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
852  -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
853  -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
854  -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
855 
856  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
857  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
858  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
859  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
860  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
861  3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
862  },
863  qScale, qOffset)));
864 
865  return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
866  workloadFactory,
867  memoryManager,
868  tensorHandleFactory,
869  input,
870  kernel,
871  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
872  expectedOutput,
873  qScale,
874  qOffset,
875  layout);
876 }
877 
878 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
879  typename T = armnn::ResolveType<ArmnnType>>
881  armnn::IWorkloadFactory& workloadFactory,
883  const armnn::ITensorHandleFactory& tensorHandleFactory,
884  const armnn::DataLayout layout,
885  float qScale,
886  int32_t qOffset)
887 {
888  // Use a single-batch 1-channel 3x3 image as input.
889  armnn::TensorInfo inputDesc({1, 1, 3, 3}, ArmnnType);
890  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
891  QuantizedVector<T>({
892  11,21,31,
893  12,22,32,
894  13,23,33
895  },
896  qScale, qOffset)));
897 
898  // Use 1 batch of a 1-channel 2x2 kernel.
899  armnn::TensorInfo kernelDesc({1, 1, 2, 2}, ArmnnType);
900  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
901  QuantizedVector<T>({
902  -11,-21,
903  -12,-22,
904  },
905  qScale, qOffset)));
906 
907 // Expected output is 1 batch of a 1-channel 6x8 image.
908 // Manually calculated like this:
909 //[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..]
910 //[-11*0 -21*0 -12*0 -22*11 ; -11*0 -21*0 -12*11 -22*21 ; -11*0 -21*0 -12*21 -22*31 ; -11*0 -21*0 -12*31 -22*0 ..]
911 //[-11*0 -21*11 -12*0 -22*12 ; -11*11 -21*21 -12*12 -22*22 ; -11*21 -21*31 -12*22 -22*32 ; -11*31 -21*0 -12*32 -22*0 ..]
912 //[-11*0 -21*12 -12*0 -22*13 ; -11*12 -21*22 -12*13 -22*23 ; -11*22 -21*32 -12*23 -22*33 ; -11*32 -21*0 -12*33 -22*0 ..]
913 //[-11*0 -21*13 -12*0 -22*0 ; -11*13 -21*23 -12*0 -22*0 ; -11*23 -21*33 -12*0 -22*0 ; -11*33 -21*0 -12*0 -22*0 ..]
914 //[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..]
915 //[..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ..]
916  armnn::TensorInfo outputDesc({1, 1, 8, 6}, ArmnnType);
917  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
918  QuantizedVector<T>({
919  0, 0, 0, 0, 0, 0,
920  -242, -594, -934, -372, 0, 0,
921  -495, -1190, -1850, -725, 0, 0,
922  -538, -1256, -1916, -748, 0, 0,
923  -273, -626, -946, -363, 0, 0,
924  0, 0, 0, 0, 0, 0,
925  0, 0, 0, 0, 0, 0,
926  0, 0, 0, 0, 0, 0
927  },
928  qScale, qOffset)));
929 
930  return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
931  workloadFactory,
932  memoryManager,
933  tensorHandleFactory,
934  input,
935  kernel,
936  GetBias2<ArmnnBType>(false, qScale * qScale),
937  expectedOutput,
938  qScale,
939  qOffset,
940  layout,
941  1, // Padding left.
942  2, // Padding top.
943  3, // Padding right.
944  4); // Padding bottom.
945 }
946 
947 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
948  typename T = armnn::ResolveType<ArmnnType>>
950  armnn::IWorkloadFactory& workloadFactory,
952  const armnn::ITensorHandleFactory& tensorHandleFactory,
953  const armnn::DataLayout layout,
954  float qScale,
955  int32_t qOffset)
956 {
957  // Use a single-batch 1-channel 5x5 image as input.
958  armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, ArmnnType);
959  boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
960  QuantizedVector<T>({
961  11,21,31,41,51,
962  12,22,32,42,52,
963  13,23,33,43,53,
964  14,24,34,44,54,
965  15,25,35,45,55,
966  }, qScale, qOffset)));
967 
968  // Use 1 batch of a 1-channel 4x4 kernel.
969  armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, ArmnnType);
970  boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
971  QuantizedVector<T>({
972  -11,-21,-31,-41,
973  -12,-22,-32,-42,
974  -13,-23,-33,-43,
975  -14,-24,-34,-44,
976  },
977  qScale, qOffset)));
978 
979  // Expected output is 1 batch of a 1-channel 5x5 image.
980  armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, ArmnnType);
981  std::vector<T> myVec(outputDesc.GetNumElements(), 0);
982  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
983  QuantizedVector<T>({
984  -7140, -10580, -13940, -9300, -5230,
985  -9590, -14120, -18520, -12290, -6860,
986  -9980, -14560, -18960, -12560, -7000,
987  -7518, -10904, -14144, -9318, -5152,
988  -5032, -7256, -9376, -6142, -3368,
989  },
990  qScale, qOffset)));
991 
992  return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
993  workloadFactory,
994  memoryManager,
995  tensorHandleFactory,
996  input,
997  kernel,
998  GetBias2<ArmnnBType>(false, qScale * qScale),
999  expectedOutput,
1000  qScale,
1001  qOffset,
1002  layout,
1003  1, // Padding left.
1004  1, // Padding top.
1005  2, // Padding right.
1006  2); // Padding bottom.
1007 }
1008 
1009 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
1011  armnn::IWorkloadFactory& workloadFactory,
1013  const armnn::ITensorHandleFactory& tensorHandleFactory,
1014  const std::vector<float>& inputNoQuantizedValues,
1015  armnn::TensorInfo& inputTensorInfo,
1016  const std::vector<float>& kernelNoQuantizedValues,
1017  armnn::TensorInfo& kernelTensorInfo,
1018  const std::vector<float>& outputExpectedNoQuantizedValues,
1019  armnn::TensorInfo& outputTensorInfo,
1020  uint32_t dilationX,
1021  uint32_t dilationY,
1023  uint32_t padLeft = 0,
1024  uint32_t padTop = 0,
1025  uint32_t padRight = 0,
1026  uint32_t padBottom = 0,
1027  uint32_t strideX = 1,
1028  uint32_t strideY = 1,
1029  bool biasEnabled = false
1030 )
1031 {
1032  float qScale;
1033  int32_t qOffset;
1034  switch (ArmnnType)
1035  {
1038  {
1039  qScale = 0.1f;
1040  qOffset = 128;
1041  break;
1042  }
1044  {
1045  qScale = 0.1f;
1046  qOffset = 0;
1047  break;
1048  }
1050  default:
1051  {
1052  qScale = 0.f;
1053  qOffset = 0;
1054  break;
1055  }
1056  }
1057 
1058  inputTensorInfo.SetQuantizationScale(qScale);
1059  inputTensorInfo.SetQuantizationOffset(qOffset);
1060  kernelTensorInfo.SetQuantizationScale(qScale);
1061  kernelTensorInfo.SetQuantizationOffset(qOffset);
1062  outputTensorInfo.SetQuantizationScale(qScale);
1063  outputTensorInfo.SetQuantizationOffset(qOffset);
1064 
1065  auto input = MakeTensor<T, 4>(inputTensorInfo,
1066  std::vector<T>(QuantizedVector<T>(inputNoQuantizedValues,
1067  inputTensorInfo.GetQuantizationScale(),
1068  inputTensorInfo.GetQuantizationOffset())));
1069  auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
1070  std::vector<T>(QuantizedVector<T>(kernelNoQuantizedValues,
1071  kernelTensorInfo.GetQuantizationScale(),
1072  kernelTensorInfo.GetQuantizationOffset())));
1073  auto expectedOutput =
1074  MakeTensor<T, 4>(outputTensorInfo,
1075  std::vector<T>(QuantizedVector<T>(outputExpectedNoQuantizedValues,
1076  outputTensorInfo.GetQuantizationScale(),
1077  outputTensorInfo.GetQuantizationOffset())));
1078 
1079  return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
1080  workloadFactory,
1081  memoryManager,
1082  tensorHandleFactory,
1083  input,
1084  kernel,
1085  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
1086  expectedOutput,
1087  qScale,
1088  qOffset,
1089  layout,
1090  padLeft,
1091  padTop,
1092  padRight,
1093  padBottom,
1094  strideX,
1095  strideY,
1096  dilationX,
1097  dilationY);
1098 }
1099 
1100 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
1102  armnn::IWorkloadFactory& workloadFactory,
1104  const armnn::ITensorHandleFactory& tensorHandleFactory,
1105  bool biasEnabled,
1106  const armnn::DataLayout layout)
1107 {
1108  armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
1109  std::vector<float> inputNoQuantizedValues =
1110  {
1111  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1112  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1113  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1114  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1115  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1116  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1117  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1118  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1119  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1120  0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1121  };
1122 
1123  armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
1124  std::vector<float> kernelNoQuantizedValues =
1125  {
1126  1, 2, 3,
1127  4, 5, 6,
1128  7, 8, 9
1129  };
1130 
1131  // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
1132  // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
1133  armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
1134  std::vector<float> outputExpectedNoQuantizedValues =
1135  {
1136  6., 5., 5., 5.,
1137  6., 5., 5., 5.,
1138  6., 5., 5., 5.,
1139  3., 2., 2., 2.
1140  };
1141 
1142  return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1143  workloadFactory,
1144  memoryManager,
1145  tensorHandleFactory,
1146  inputNoQuantizedValues,
1147  inputTensorInfo,
1148  kernelNoQuantizedValues,
1149  kernelTensorInfo,
1150  outputExpectedNoQuantizedValues,
1151  outputTensorInfo,
1152  3,
1153  3,
1154  layout,
1155  biasEnabled);
1156 }
1157 
1158 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
1160  armnn::IWorkloadFactory& workloadFactory,
1162  const armnn::ITensorHandleFactory& tensorHandleFactory,
1163  bool biasEnabled,
1164  const armnn::DataLayout layout)
1165 {
1166  armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
1167  std::vector<float> inputNoQuantizedValues =
1168  {
1169  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1170  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1171  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1172  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1173  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1174  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1175  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1176  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1177  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1178  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1179 
1180  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1181  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1182  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1183  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1184  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1185  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1186  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1187  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1188  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1189  0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1190  };
1191 
1192  armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
1193  std::vector<float> kernelNoQuantizedValues =
1194  {
1195  1, 2, 3,
1196  4, 5, 6,
1197  7, 8, 9,
1198 
1199  1, 2, 3,
1200  4, 5, 6,
1201  7, 8, 9
1202  };
1203 
1204  // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
1205  // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
1206  armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
1207  std::vector<float> outputExpectedNoQuantizedValues =
1208  {
1209  12., 10., 10., 10.,
1210  12., 10., 10., 10.,
1211  12., 10., 10., 10.,
1212  6., 4., 4., 4.
1213  };
1214 
1215  return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1216  workloadFactory,
1217  memoryManager,
1218  tensorHandleFactory,
1219  inputNoQuantizedValues,
1220  inputTensorInfo,
1221  kernelNoQuantizedValues,
1222  kernelTensorInfo,
1223  outputExpectedNoQuantizedValues,
1224  outputTensorInfo,
1225  3,
1226  3,
1227  layout,
1228  biasEnabled);
1229 }
1230 
1231 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
1233  armnn::IWorkloadFactory &workloadFactory,
1235  const armnn::ITensorHandleFactory& tensorHandleFactory,
1236  bool biasEnabled,
1237  const armnn::DataLayout layout)
1238 {
1239  armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
1240  std::vector<float> inputNoQuantizedValues =
1241  {
1242  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1243  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1244  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1245  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1246  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1247  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1248  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1249  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1250  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1251  1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1252  };
1253 
1254  armnn::TensorInfo kernelTensorInfo({ 1, 1, 2, 2}, ArmnnType);
1255  std::vector<float> kernelNoQuantizedValues =
1256  {
1257  1, 2,
1258  3, 4
1259  };
1260 
1261  // Since the dilation rate is 2 this will dilate the kernel to be like 3x3: d(K-1)+1 --> 2 x (2-1) + 1 = 3,
1262  // therefore the output will be 4x4: (I − K + 2P)/S +1 => trunc ( (10 - 3 + 2x2 ) / 3 + 1 )
1263  // where, dilation size = d = 2; kernel size = K = 2; input size = I = 10; padding size = P = 2; stride = S = 3
1264  armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
1265  std::vector<float> outputExpectedNoQuantizedValues =
1266  {
1267  4, 7, 7, 3,
1268  6, 10, 10, 4,
1269  6, 10, 10, 4,
1270  2, 3, 3, 1
1271  };
1272  uint32_t padLeft = 1;
1273  uint32_t padTop = 1;
1274  uint32_t padRight = 1;
1275  uint32_t padBottom = 1;
1276 
1277  return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1278  workloadFactory,
1279  memoryManager,
1280  tensorHandleFactory,
1281  inputNoQuantizedValues,
1282  inputTensorInfo,
1283  kernelNoQuantizedValues,
1284  kernelTensorInfo,
1285  outputExpectedNoQuantizedValues,
1286  outputTensorInfo,
1287  2,
1288  2,
1289  layout,
1290  padLeft,
1291  padTop,
1292  padRight,
1293  padBottom,
1294  3,
1295  3,
1296  biasEnabled
1297  );
1298 }
1299 
1300 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
1302  armnn::IWorkloadFactory& workloadFactory,
1304  armnn::IWorkloadFactory& refWorkloadFactory,
1305  const armnn::ITensorHandleFactory& tensorHandleFactory,
1306  const armnn::ITensorHandleFactory& refTensorHandleFactory)
1307 {
1308  unsigned int inputHeight = 8;
1309  unsigned int inputWidth = 16;
1310  unsigned int inputChannels = 3;
1311  unsigned int inputNum = 5;
1312 
1313  unsigned int kernelHeight = 3;
1314  unsigned int kernelWidth = 3;
1315 
1316  unsigned int strideX = 2;
1317  unsigned int strideY = 3;
1318  unsigned int padX = 1;
1319  unsigned int padY = 1;
1320 
1321  unsigned int outputNum = inputNum;
1322  unsigned int outputChannels = 2;
1323  unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
1324  unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
1325 
1326  armnn::TensorInfo inputTensorInfo;
1327  armnn::TensorInfo outputTensorInfo;
1328  armnn::TensorInfo kernelDesc;
1329  armnn::TensorInfo biasDesc;
1330 
1331  unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth};
1332  unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth};
1333  unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth};
1334  unsigned int biasShape[] = {outputChannels};
1335 
1336  inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
1337  outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
1338  kernelDesc = armnn::TensorInfo(4, kernelShape, ArmnnType);
1339  biasDesc = armnn::TensorInfo(1, biasShape, ArmnnType);
1340 
1341  LayerTestResult<T,4> ret(outputTensorInfo);
1342 
1343  auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908);
1344  auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234);
1345  auto bias = MakeRandomTensor<T, 1>(biasDesc, 1028);
1346 
1347  std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
1348  std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
1349 
1352  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
1353  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
1354 
1355  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
1356  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
1357 
1358  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1359  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1360  data.m_Weight = &weightsTensor;
1361  data.m_Bias = &biasTensor;
1362  data.m_Parameters.m_StrideX = strideX;
1363  data.m_Parameters.m_StrideY = strideY;
1364  data.m_Parameters.m_PadLeft = padX;
1365  data.m_Parameters.m_PadRight = padX;
1366  data.m_Parameters.m_PadTop = padY;
1367  data.m_Parameters.m_PadBottom = padY;
1368  data.m_Parameters.m_BiasEnabled = true;
1369 
1370  std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refTensorHandleFactory.CreateTensorHandle(outputTensorInfo);
1371  std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refTensorHandleFactory.CreateTensorHandle(inputTensorInfo);
1372 
1373  armnn::Convolution2dQueueDescriptor refData = data;
1374  armnn::WorkloadInfo refInfo = info;
1375  SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
1376  SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
1377 
1378  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
1379  std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateConvolution2d(refData, refInfo);
1380 
1381  outputHandleRef->Allocate();
1382  inputHandleRef->Allocate();
1383 
1384  inputHandle->Allocate();
1385  outputHandle->Allocate();
1386 
1387  CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
1388  CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
1389 
1390  ExecuteWorkload(*workload, memoryManager);
1391 
1392  workloadRef->PostAllocationConfigure();
1393  workloadRef->Execute();
1394 
1395  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1396  CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
1397 
1398  return ret;
1399 }
1400 
1402  armnn::IWorkloadFactory& workloadFactory,
1404  const armnn::ITensorHandleFactory& tensorHandleFactory,
1405  bool biasEnabled,
1406  const armnn::DataLayout& dataLayout)
1407 {
1408  // BFloat16 input and weight, Float32 output
1409  armnn::IgnoreUnused(biasEnabled);
1410 
1411  // Input is a single-batch, 1 channel, 5x5 image.
1412  armnn::TensorInfo inputDesc({1, 5, 5, 1}, armnn::DataType::BFloat16);
1413 
1414  std::vector<armnn::BFloat16> inputValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
1415  {
1416  10.0367984f, // 10.0625
1417  2.0380895f, // 2.03125
1418  15.0420157f, // 15.0625
1419  22.0675631f, // 22.125
1420  8.0938920f, // 8.125
1421  5.0476106f, // 5.0625
1422  80.1035490f, // 80
1423  100.1260370f, // 100
1424  55.0461647f, // 55
1425  120.0883828f, // 120
1426  9.1159540f, // 9.125
1427  90.0498519f, // 90
1428  200.0104630f, // 200
1429  30.0154114f, // 30
1430  75.00137681f, // 75
1431  30.0344238f, // 30
1432  25.0356445f, // 25
1433  130.0495605f, // 130
1434  60.0683594f, // 60
1435  35.0991211f, // 35
1436  8.0461426f, // 8.0625
1437  12.0996094f, // 12.125
1438  98.1269530f, // 98
1439  125.0393066f, // 125
1440  5.103516f // 5.0937
1441  },
1442  1.0f, 0);
1443 
1444  auto input = MakeTensor<armnn::BFloat16, 4>(inputDesc, inputValues);
1445 
1446  // Use a 3x3 kernel.
1447  armnn::TensorInfo kernelDesc({1, 3, 3, 1}, armnn::DataType::BFloat16);
1448 
1449  std::vector<armnn::BFloat16> kernelValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
1450  {
1451  -0.126184f, // -0.125977
1452  -0.150468f, // -0.150391
1453  -0.101412f, // -0.101562
1454  -0.0586369f,// -0.0585938
1455  -0.0865864f,// -0.0864258
1456  -0.0435089f,// -0.043457
1457  0.0347555f, // 0.034668
1458  0.0323111f, // 0.0322266
1459  0.0385381f // 0.0385742
1460  },
1461  1.0f, 0);
1462 
1463  auto kernel = MakeTensor<armnn::BFloat16, 4>(kernelDesc, kernelValues);
1464 
1465  // Expected output is a single-batch, 1 channel, 3x3 image.
1466  armnn::TensorInfo outputDesc({1, 3, 3, 1}, armnn::DataType::Float32);
1467 
1468  // Expected output (with results if calculated as FP32 in the comments)
1469  const std::vector<float> outputData =
1470  {
1471  2.296875f, // 2.29240716
1472  5.75f, // 5.75851926
1473  3.78125f, // 3.79855026
1474  -11.625f, // -11.65498118
1475  -47.25f, // -47.27316893
1476  -30.0f, // -30.04771684
1477  -8.25f, // -8.28126168
1478  -43.5f, // -43.46531337
1479  -20.625f // -20.63477281
1480  };
1481 
1482  boost::multi_array<float, 4> expectedOutput = MakeTensor<float, 4>(outputDesc, outputData);
1483 
1484  uint32_t padLeft = 1;
1485  uint32_t padTop = 1;
1486  uint32_t padRight = 1;
1487  uint32_t padBottom = 1;
1488  uint32_t strideX = 2;
1489  uint32_t strideY = 2;
1490 
1493  workloadFactory,
1494  memoryManager,
1495  tensorHandleFactory,
1496  input,
1497  kernel,
1498  boost::multi_array<float, 1>(),
1499  expectedOutput,
1500  dataLayout,
1501  1.0f,
1502  0,
1503  padLeft,
1504  padTop,
1505  padRight,
1506  padBottom,
1507  strideX,
1508  strideY);
1509 }
1510 
1512  armnn::IWorkloadFactory& workloadFactory,
1514  const armnn::ITensorHandleFactory& tensorHandleFactory,
1515  bool biasEnabled,
1516  const armnn::DataLayout& dataLayout)
1517 {
1518  // BFloat16 input and weight, Float32 output
1519  armnn::IgnoreUnused(biasEnabled);
1520 
1521  // Input is a single-batch, 1 channel, 5x5 image.
1522  armnn::TensorInfo inputDesc({1, 5, 5, 1}, armnn::DataType::BFloat16);
1523 
1524  std::vector<armnn::BFloat16> inputValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
1525  {
1526  0.0367984f, // 0.0368652
1527  0.0380895f, // 0.0380859
1528  0.0420157f, // 0.0419922
1529  0.0675631f, // 0.0673828
1530  0.0938920f, // 0.09375
1531  0.0476106f, // 0.0476074
1532  0.1035490f, // 0.103516
1533  0.1260370f, // 0.125977
1534  0.0461647f, // 0.0461426
1535  0.0883828f, // 0.0883789
1536  0.1159540f, // 0.115723
1537  0.0498519f, // 0.0498047
1538  0.0104630f, // 0.010437
1539  0.0154114f, // 0.0154419
1540  0.00137681f, // 0.00137329
1541  0.0344238f, // 0.0344616
1542  0.0356445f, // 0.0355693
1543  0.0495605f, // 0.0495018
1544  0.0683594f, // 0.0683308
1545  0.0991211f, // 0.0988837
1546  0.0461426f, // 0.0461838
1547  0.0996094f, // 0.0997546
1548  0.1269530f, // 0.127099
1549  0.0393066f, // 0.0392791
1550  0.103516f // 0.103641
1551  },
1552  1.0f, 0);
1553 
1554  auto input = MakeTensor<armnn::BFloat16, 4>(inputDesc, inputValues);
1555 
1556  // Use a 3x3 kernel.
1557  armnn::TensorInfo kernelDesc({1, 3, 3, 1}, armnn::DataType::BFloat16);
1558 
1559  std::vector<armnn::BFloat16> kernelValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
1560  {
1561  -0.126184f, // -0.125977
1562  -0.150468f, // -0.150391
1563  -0.101412f, // -0.101562
1564  -0.0586369f,// -0.0585938
1565  -0.0865864f,// -0.0864258
1566  -0.0435089f,// -0.043457
1567  0.0347555f, // 0.034668
1568  0.0323111f, // 0.0322266
1569  0.0385381f // 0.0385742
1570  },
1571  1.0f, 0);
1572 
1573  auto kernel = MakeTensor<armnn::BFloat16, 4>(kernelDesc, kernelValues);
1574 
1575  // Expected output is a single-batch, 1 channel, 3x3 image.
1576  armnn::TensorInfo outputDesc({1, 3, 3, 1}, armnn::DataType::Float32);
1577 
1578  // Expected output (with results if calculated as FP32 in the comments)
1579  const std::vector<float> outputData =
1580  {
1581  0.000686645508f, // 0.000685
1582  0.000640869141f, // 0.000639
1583  -0.00759887695f, // -0.007631
1584  -0.02734375f, // -0.027388
1585  -0.0356445312f, // -0.035737
1586  -0.0145874023f, // -0.014568
1587  -0.0170898438f, // -0.017124
1588  -0.0373535156f, // -0.037431
1589  -0.0346679688f // -0.034808
1590  };
1591 
1592  boost::multi_array<float, 4> expectedOutput = MakeTensor<float, 4>(outputDesc, outputData);
1593 
1594  uint32_t padLeft = 1;
1595  uint32_t padTop = 1;
1596  uint32_t padRight = 1;
1597  uint32_t padBottom = 1;
1598  uint32_t strideX = 2;
1599  uint32_t strideY = 2;
1600 
1603  workloadFactory,
1604  memoryManager,
1605  tensorHandleFactory,
1606  input,
1607  kernel,
1608  boost::multi_array<float, 1>(),
1609  expectedOutput,
1610  dataLayout,
1611  1.0f,
1612  0,
1613  padLeft,
1614  padTop,
1615  padRight,
1616  padBottom,
1617  strideX,
1618  strideY);
1619 }
1620 
1621 //
1622 // DepthwiseConvolution2d implementations
1623 //
1624 
1625 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
1628  armnn::IWorkloadFactory& workloadFactory,
1630  const armnn::ITensorHandleFactory& tensorHandleFactory,
1631  const boost::multi_array<T, 4>& input,
1632  const boost::multi_array<T, 4>& kernel,
1633  const boost::multi_array<B, 1>& bias,
1634  const boost::multi_array<T, 4>& outputExpected,
1635  float qScale,
1636  int32_t qOffset,
1637  const armnn::DataLayout layout,
1638  uint32_t padLeft = 0,
1639  uint32_t padTop = 0,
1640  uint32_t padRight = 0,
1641  uint32_t padBottom = 0,
1642  uint32_t strideX = 1,
1643  uint32_t strideY = 1)
1644 {
1645  unsigned int inputNum = armnn::numeric_cast<unsigned int>(input.shape()[0]);
1646  unsigned int inputChannels = armnn::numeric_cast<unsigned int>(input.shape()[1]);
1647  unsigned int inputHeight = armnn::numeric_cast<unsigned int>(input.shape()[2]);
1648  unsigned int inputWidth = armnn::numeric_cast<unsigned int>(input.shape()[3]);
1649  unsigned int kernelChanMul = armnn::numeric_cast<unsigned int>(kernel.shape()[0]);
1650  unsigned int kernelChannels = armnn::numeric_cast<unsigned int>(kernel.shape()[1]);
1651  unsigned int kernelHeight = armnn::numeric_cast<unsigned int>(kernel.shape()[2]);
1652  unsigned int kernelWidth = armnn::numeric_cast<unsigned int>(kernel.shape()[3]);
1653  unsigned int outputNum = armnn::numeric_cast<unsigned int>(outputExpected.shape()[0]);
1654  unsigned int outputChannels = armnn::numeric_cast<unsigned int>(outputExpected.shape()[1]);
1655  unsigned int outputHeight = armnn::numeric_cast<unsigned int>(outputExpected.shape()[2]);
1656  unsigned int outputWidth = armnn::numeric_cast<unsigned int>(outputExpected.shape()[3]);
1657 
1658  // If a bias is used, its size must equal the number of output channels.
1659  bool biasEnabled = bias.size() > 0;
1660  ARMNN_ASSERT(!biasEnabled || bias.size() == outputChannels);
1661 
1662  // Creates the tensors.
1663  armnn::TensorInfo inputTensorInfo =
1664  armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
1665  armnn::TensorInfo outputTensorInfo =
1666  armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
1667  armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
1668  armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
1669 
1670  // Set quantization parameters if the requested type is a quantized type.
1671  if (armnn::IsQuantizedType<T>())
1672  {
1673  inputTensorInfo.SetQuantizationScale(qScale);
1674  inputTensorInfo.SetQuantizationOffset(qOffset);
1675  outputTensorInfo.SetQuantizationScale(qScale);
1676  outputTensorInfo.SetQuantizationOffset(qOffset);
1677  kernelDesc.SetQuantizationScale(qScale);
1678  kernelDesc.SetQuantizationOffset(qOffset);
1679  biasDesc.SetQuantizationScale(qScale*qScale);
1680  biasDesc.SetQuantizationOffset(0);
1681  }
1682 
1683  // Construct the input data.
1684  std::vector<T> inputData;
1685  inputData.assign(input.data(), input.data() + inputChannels*inputHeight*inputWidth);
1686 
1687  // At this point if we require it permute the input data
1688  const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
1689  if (layout == armnn::DataLayout::NHWC)
1690  {
1691  std::vector<T> tmp(inputData.size());
1692  armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
1693  inputData = tmp;
1694  }
1695 
1696  auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
1697 
1698  // Construct the output data, with bias applied, as appropriate.
1699  std::vector<T> outputData;
1700  outputData.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth);
1701  if (biasEnabled)
1702  {
1703  std::vector<T> biasV;
1704  biasV.assign(bias.data(), bias.data() + outputChannels);
1705  ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
1706  biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
1707  outputWidth, outputHeight);
1708  }
1709 
1710  LayerTestResult<T, 4> ret(outputTensorInfo);
1711 
1712  // At this point if we require it permute the expected output
1713  if (layout == armnn::DataLayout::NHWC)
1714  {
1715  std::vector<T> tmp(outputData.size());
1716  armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
1717  outputData = tmp;
1718  }
1719 
1720  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
1721 
1722  std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
1723  std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
1724 
1725  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
1726 
1727  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
1728 
1729  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
1730  if (biasEnabled)
1731  {
1732  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
1733  }
1734 
1736  data.m_Weight = &weightsTensor;
1737  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs.
1738  data.m_Parameters.m_StrideX = strideX;
1739  data.m_Parameters.m_StrideY = strideY;
1740  data.m_Parameters.m_PadLeft = padLeft;
1741  data.m_Parameters.m_PadRight = padRight;
1742  data.m_Parameters.m_PadTop = padTop;
1743  data.m_Parameters.m_PadBottom = padBottom;
1744  data.m_Parameters.m_BiasEnabled = biasEnabled;
1745  data.m_Parameters.m_DataLayout = layout;
1746 
1748  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1749  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1750 
1751  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
1752  inputHandle->Allocate();
1753  outputHandle->Allocate();
1754 
1755  CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
1756 
1757  ExecuteWorkload(*workload, memoryManager);
1758 
1759  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1760 
1761  return ret;
1762 }
1763 
1764 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
1766  armnn::IWorkloadFactory& workloadFactory,
1768  const armnn::ITensorHandleFactory& tensorHandleFactory,
1769  float qScale,
1770  int32_t qOffset,
1771  bool biasEnabled,
1772  const armnn::DataLayout layout)
1773 {
1775 
1776  unsigned int inputHeight = 3;
1777  unsigned int inputWidth = 3;
1778  unsigned int inputChannels = 2;
1779  unsigned int inputNum = 1;
1780 
1781  unsigned int kernelHeight = 3;
1782  unsigned int kernelWidth = 3;
1783  unsigned int kernelChannels = inputChannels;
1784  unsigned int kernelDepthMultiplier = 1;
1785 
1786  unsigned int outputHeight = 1;
1787  unsigned int outputWidth = 1;
1788  unsigned int outputChannels = kernelChannels;
1789  unsigned int outputNum = inputNum;
1790 
1791  armnn::TensorInfo inputTensorInfo =
1792  armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
1793  armnn::TensorInfo outputTensorInfo =
1794  armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
1795  armnn::TensorInfo kernelDesc({kernelDepthMultiplier, kernelChannels, kernelHeight, kernelWidth},
1796  ArmnnType);
1797  armnn::TensorInfo biasDesc({ outputChannels }, ArmnnBType);
1798 
1799  // Set quantization parameters if the requested type is a quantized type.
1800  if(armnn::IsQuantizedType<T>())
1801  {
1802  inputTensorInfo.SetQuantizationScale(qScale);
1803  inputTensorInfo.SetQuantizationOffset(qOffset);
1804  outputTensorInfo.SetQuantizationScale(qScale);
1805  outputTensorInfo.SetQuantizationOffset(qOffset);
1806  kernelDesc.SetQuantizationScale(qScale);
1807  kernelDesc.SetQuantizationOffset(qOffset);
1808  biasDesc.SetQuantizationScale(qScale*qScale);
1809  biasDesc.SetQuantizationOffset(0);
1810  }
1811  std::vector<T> inputData = std::vector<T>(
1812  QuantizedVector<T>({
1813  1.f, 2.f, 1.f,
1814  2.f, 1.f, 2.f,
1815  1.f, 2.f, 1.f,
1816 
1817  1.f, 2.f, 1.f,
1818  2.f, 1.f, 2.f,
1819  1.f, 2.f, 1.f,
1820  },
1821  inputTensorInfo.GetQuantizationScale(),
1822  inputTensorInfo.GetQuantizationOffset()));
1823 
1824  // at this point if we require it permute the input data
1825  const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
1826  if (layout == armnn::DataLayout::NHWC)
1827  {
1828  std::vector<T> tmp(inputData.size());
1829  armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
1830  inputData = tmp;
1831  }
1832  auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
1833 
1834  std::vector<B> biasV(QuantizedVector<B>({ 0, 2 },
1835  biasDesc.GetQuantizationScale(),
1836  biasDesc.GetQuantizationOffset()));
1837 
1838  auto bias = MakeTensor<B, 1>(biasDesc, biasV);
1839 
1840  std::vector<T> kernelData = std::vector<T>(
1841  QuantizedVector<T>({
1842  1.f, 0.f, 1.f,
1843  0.f, 0.f, 0.f,
1844  -1.f, 0.f, -1.f,
1845 
1846  1.f, 0.f, 1.f,
1847  0.f, 0.f, 0.f,
1848  -1.f, 0.f, -1.f,
1849  },
1850  kernelDesc.GetQuantizationScale(),
1851  kernelDesc.GetQuantizationOffset()));
1852 
1853  auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
1854 
1855  // Manually calculated.
1856  std::vector<T> outputImage(
1857  QuantizedVector<T>({ 0.f, 0.f },
1858  outputTensorInfo.GetQuantizationScale(),
1859  outputTensorInfo.GetQuantizationOffset())
1860  );
1861 
1862  // Optionally apply bias to output image.
1863  if(biasEnabled)
1864  {
1865  ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
1866  biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
1867  outputWidth, outputHeight);
1868  }
1869 
1870  LayerTestResult<T, 4> ret(outputTensorInfo);
1871  if (layout == armnn::DataLayout::NHWC)
1872  {
1873  std::vector<T> tmp(outputImage.size());
1874  armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data(), sizeof(T));
1875  outputImage = tmp;
1876  }
1877 
1878  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
1879 
1880  std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
1881  std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
1882 
1885  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
1886  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
1887 
1888  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
1889  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
1890 
1891  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1892  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1893 
1894  data.m_Weight = &weightsTensor;
1895  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
1896  data.m_Parameters.m_StrideX = 1;
1897  data.m_Parameters.m_StrideY = 1;
1898  data.m_Parameters.m_PadLeft = 0;
1899  data.m_Parameters.m_PadRight = 0;
1900  data.m_Parameters.m_PadTop = 0;
1901  data.m_Parameters.m_PadBottom = 0;
1902  data.m_Parameters.m_BiasEnabled = biasEnabled;
1903  data.m_Parameters.m_DataLayout = layout;
1904 
1905  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
1906  inputHandle->Allocate();
1907  outputHandle->Allocate();
1908 
1909  CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
1910 
1911  ExecuteWorkload(*workload, memoryManager);
1912 
1913  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1914 
1915  return ret;
1916 }
1917 
1918 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
1920  armnn::IWorkloadFactory& workloadFactory,
1922  const armnn::ITensorHandleFactory& tensorHandleFactory,
1923  float qScale,
1924  int32_t qOffset,
1925  bool biasEnabled,
1926  const armnn::DataLayout layout)
1927 {
1929 
1930  unsigned int depthMultiplier = 2;
1931 
1932  unsigned int inputHeight = 8;
1933  unsigned int inputWidth = 16;
1934  unsigned int inputChannels = 2;
1935  unsigned int inputBatchSize = 1;
1936 
1937  unsigned int kernelHeight = 5;
1938  unsigned int kernelWidth = 3;
1939 
1940  unsigned int outputHeight = inputHeight - kernelHeight + 1 + 2;
1941  unsigned int outputWidth = (inputWidth - kernelWidth + 1)/2;
1942  unsigned int outputChannels = inputChannels * depthMultiplier;
1943  unsigned int outputBatchSize = inputBatchSize;
1944 
1945  armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo(
1946  inputBatchSize, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
1947  armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo(
1948  outputBatchSize, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
1949  armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth},
1950  ArmnnType);
1951  armnn::TensorInfo biasDesc({outputChannels}, ArmnnBType);
1952 
1953  // Set quantization parameters if the requested type is a quantized type.
1954  if(armnn::IsQuantizedType<T>())
1955  {
1956  inputTensorInfo.SetQuantizationScale(qScale);
1957  inputTensorInfo.SetQuantizationOffset(qOffset);
1958  outputTensorInfo.SetQuantizationScale(qScale);
1959  outputTensorInfo.SetQuantizationOffset(qOffset);
1960  kernelDesc.SetQuantizationScale(qScale);
1961  kernelDesc.SetQuantizationOffset(qOffset);
1962  biasDesc.SetQuantizationScale(qScale*qScale);
1963  biasDesc.SetQuantizationOffset(0);
1964  }
1965 
1966  // NOTE: originalInputData is in NCHW format
1967  std::vector<T> originalInputData = std::vector<T>(
1968  QuantizedVector<T>({
1969  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1970  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1971  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1972  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1973  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1974  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1975  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1976  0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1977  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1978  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1979  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1980  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1981  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1982  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1983  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1984  0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
1985  },
1986  inputTensorInfo.GetQuantizationScale(),
1987  inputTensorInfo.GetQuantizationOffset()));
1988 
1989  std::vector<T> inputData = originalInputData;
1990  // at this point if we require it permute the input data
1991  const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
1992  if (layout == armnn::DataLayout::NHWC)
1993  {
1994  armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC,
1995  originalInputData.data(), inputData.data(), sizeof(T));
1996  }
1997  auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
1998 
1999  std::vector<B> biasV = QuantizedVector<B>({ 0, 2, 1, -1 },
2000  biasDesc.GetQuantizationScale(),
2001  biasDesc.GetQuantizationOffset());
2002 
2003  auto bias = MakeTensor<B, 1>(biasDesc, biasV);
2004 
2005  std::vector<T> kernelData = std::vector<T>(
2006  QuantizedVector<T>({
2007  1, 1, 1,
2008  1, -1, 1,
2009  1, 1, 1,
2010  1, 1, 1,
2011  1, 1, 1,
2012 
2013  2, 2, 2,
2014  2, 2, 2,
2015  2, 2, 2,
2016  2, 2, 2,
2017  2, 2, 2,
2018 
2019  0, 0, 0,
2020  0, -1, 0,
2021  0, 0, 0,
2022  0, 0, 0,
2023  0, 0, 0,
2024 
2025  0, 0, 0,
2026  0, 0, 0,
2027  0, 1, 0,
2028  0, 0, 0,
2029  0, 0, 0
2030  },
2031  kernelDesc.GetQuantizationScale(),
2032  kernelDesc.GetQuantizationOffset()));
2033 
2034  auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
2035 
2036  // Manually calculated.
2037  std::vector<T> originalOutputImage = std::vector<T>(
2038  QuantizedVector<T>({
2039  3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f,
2040  6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f,
2041  5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
2042  6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
2043  6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
2044  5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
2045 
2046  -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
2047  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2048  -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
2049  -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
2050  -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
2051  -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
2052 
2053  8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2054  10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2055  10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2056  10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2057  10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2058  8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2059 
2060  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2061  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2062  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2063  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2064  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2065  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
2066  },
2067  outputTensorInfo.GetQuantizationScale(),
2068  outputTensorInfo.GetQuantizationOffset()));
2069 
2070  // Optionally apply bias to output image.
2071  if(biasEnabled)
2072  {
2073  ApplyBias(originalOutputImage,
2074  outputTensorInfo.GetQuantizationScale(),
2075  outputTensorInfo.GetQuantizationOffset(),
2076  biasV,
2077  biasDesc.GetQuantizationScale(),
2078  biasDesc.GetQuantizationOffset(),
2079  outputWidth,
2080  outputHeight);
2081  }
2082 
2083  LayerTestResult<T, 4> ret(outputTensorInfo);
2084  std::vector<T> outputImage = originalOutputImage;
2085  if (layout == armnn::DataLayout::NHWC)
2086  {
2087  armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC,
2088  originalOutputImage.data(), outputImage.data(), sizeof(T));
2089  }
2090 
2091  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
2092 
2093  std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
2094  std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
2095 
2098  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
2099  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
2100 
2101  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
2102  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
2103 
2104  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
2105  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2106 
2107  data.m_Weight = &weightsTensor;
2108  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled.
2109  data.m_Parameters.m_StrideX = 2;
2110  data.m_Parameters.m_StrideY = 1;
2111  data.m_Parameters.m_PadLeft = 0;
2112  data.m_Parameters.m_PadRight = 0;
2113  data.m_Parameters.m_PadTop = 1;
2114  data.m_Parameters.m_PadBottom = 1;
2115  data.m_Parameters.m_BiasEnabled = biasEnabled;
2116  data.m_Parameters.m_DataLayout = layout;
2117 
2118  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
2119  inputHandle->Allocate();
2120  outputHandle->Allocate();
2121 
2122  CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
2123 
2124  ExecuteWorkload(*workload, memoryManager);
2125 
2126  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
2127 
2128  return ret;
2129 }
2130 
2131 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2134  armnn::IWorkloadFactory& workloadFactory,
2136  const armnn::ITensorHandleFactory& tensorHandleFactory,
2137  const boost::multi_array<T, 4>& originalInput,
2138  const boost::multi_array<T, 4>& originalKernel,
2139  const boost::multi_array<B, 1>& bias,
2140  const boost::multi_array<T, 4>& originalOutputExpected,
2141  float qScale,
2142  int32_t qOffset,
2144  uint32_t padLeft = 0,
2145  uint32_t padTop = 0,
2146  uint32_t padRight = 0,
2147  uint32_t padBottom = 0,
2148  uint32_t strideX = 1,
2149  uint32_t strideY = 1,
2150  uint32_t dilationX = 1,
2151  uint32_t dilationY = 1)
2152 {
2153  unsigned int inputHeight = armnn::numeric_cast<unsigned int>(originalInput.shape()[2]);
2154  unsigned int inputWidth = armnn::numeric_cast<unsigned int>(originalInput.shape()[3]);
2155  unsigned int inputChannels = armnn::numeric_cast<unsigned int>(originalInput.shape()[1]);
2156  unsigned int inputNum = armnn::numeric_cast<unsigned int>(originalInput.shape()[0]);
2157 
2158  unsigned int outputHeight = armnn::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
2159  unsigned int outputWidth = armnn::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
2160  unsigned int outputChannels = armnn::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
2161  unsigned int outputNum = armnn::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
2162 
2163  unsigned int kernelHeight = armnn::numeric_cast<unsigned int>(originalKernel.shape()[2]);
2164  unsigned int kernelWidth = armnn::numeric_cast<unsigned int>(originalKernel.shape()[3]);
2165  unsigned int kernelChannels = armnn::numeric_cast<unsigned int>(originalKernel.shape()[1]);
2166  unsigned int kernelDepthMul = armnn::numeric_cast<unsigned int>(originalKernel.shape()[0]);
2167 
2168  bool biasEnabled = bias.size() > 0;
2169 
2170  // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
2171  ARMNN_ASSERT(inputNum == 1);
2172  ARMNN_ASSERT(outputNum == 1);
2173 
2174  // If a bias is used, its size must equal the number of output channels.
2175  ARMNN_ASSERT(!biasEnabled || bias.size() == outputChannels);
2176 
2177 
2178  // Note these tensors will use two (identical) batches.
2179  armnn::TensorInfo inputTensorInfo =
2180  armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
2181  armnn::TensorInfo outputTensorInfo =
2182  armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
2183 
2184  // Kernel must be NCHW layout always, independently of the layout of the input and output for depthwise convolution.
2185  armnn::TensorInfo kernelDesc({kernelDepthMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
2186 
2187  armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
2188 
2189  // Set quantization parameters if the requested type is a quantized type.
2190  if(armnn::IsQuantizedType<T>())
2191  {
2192  inputTensorInfo.SetQuantizationScale(qScale);
2193  inputTensorInfo.SetQuantizationOffset(qOffset);
2194  outputTensorInfo.SetQuantizationScale(qScale);
2195  outputTensorInfo.SetQuantizationOffset(qOffset);
2196  kernelDesc.SetQuantizationScale(qScale);
2197  kernelDesc.SetQuantizationOffset(qOffset);
2198  biasDesc.SetQuantizationScale(qScale*qScale);
2199  biasDesc.SetQuantizationOffset(0);
2200  }
2201 
2202  LayerTestResult<T, 4> ret(outputTensorInfo);
2203 
2204  // Construct input data
2205  std::vector<T> input;
2206  input.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth);
2207  std::vector<T> inputData;
2208  inputData.insert(inputData.end(), input.begin(), input.end());
2209  inputData.insert(inputData.end(), input.begin(), input.end());
2210 
2211  // at this point if we require it permute the input data
2212  const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
2213  if (layout == armnn::DataLayout::NHWC)
2214  {
2215  std::vector<T> tmp(inputData.size());
2216  armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
2217  inputData = tmp;
2218  }
2219 
2220  auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
2221 
2222  std::vector<T> output;
2223  output.assign(originalOutputExpected.data(),
2224  originalOutputExpected.data() + outputChannels*outputHeight*outputWidth);
2225 
2226  // Apply bias to output data if it is enabled.
2227  if(biasEnabled)
2228  {
2229  std::vector<T> biasV;
2230  biasV.assign(bias.data(), bias.data() + outputChannels);
2231  ApplyBias(output, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
2232  biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
2233  outputWidth, outputHeight);
2234  }
2235 
2236  // Construct expected output data
2237  std::vector<T> outputData;
2238  outputData.insert(outputData.end(), output.begin(), output.end());
2239  outputData.insert(outputData.end(), output.begin(), output.end());
2240 
2241  // at this point if we require it permute the expected output
2242  if (layout == armnn::DataLayout::NHWC)
2243  {
2244  std::vector<T> tmp(outputData.size());
2245  armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
2246  outputData = tmp;
2247  }
2248  ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
2249 
2250  std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
2251  std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
2252 
2255  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
2256  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
2257 
2258  boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
2259  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
2260 
2261  if(biasEnabled)
2262  {
2263  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
2264  }
2265 
2266  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
2267  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2268 
2269  data.m_Weight = &weightsTensor;
2270  data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs.
2271  data.m_Parameters.m_StrideX = strideX;
2272  data.m_Parameters.m_StrideY = strideY;
2273  data.m_Parameters.m_PadLeft = padLeft;
2274  data.m_Parameters.m_PadRight = padRight;
2275  data.m_Parameters.m_PadTop = padTop;
2276  data.m_Parameters.m_PadBottom = padBottom;
2277  data.m_Parameters.m_BiasEnabled = biasEnabled;
2278  data.m_Parameters.m_DataLayout = layout;
2279  data.m_Parameters.m_DilationX = dilationX;
2280  data.m_Parameters.m_DilationY = dilationY;
2281 
2282  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
2283  inputHandle->Allocate();
2284  outputHandle->Allocate();
2285 
2286  CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
2287 
2288  ExecuteWorkload(*workload, memoryManager);
2289 
2290  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
2291 
2292  return ret;
2293 }
2294 
2295 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2296  typename T = armnn::ResolveType<ArmnnType>>
2298  armnn::IWorkloadFactory& workloadFactory,
2300  const armnn::ITensorHandleFactory& tensorHandleFactory,
2301  float qScale,
2302  int32_t qOffset,
2303  bool biasEnabled,
2304  const armnn::DataLayout layout)
2305 {
2306  // Use a single-batch 2-channel 5x5 image as input.
2307  armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
2308  auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
2309  QuantizedVector<T>({
2310  0, 1, 2, 3, 4,
2311  5, 6, 7, 8, 9,
2312  10, 11, 12, 13, 14,
2313  15, 16, 17, 18, 19,
2314  20, 21, 22, 23, 24,
2315 
2316  25, 26, 27, 28, 29,
2317  30, 31, 32, 33, 34,
2318  35, 36, 37, 38, 39,
2319  40, 41, 42, 43, 44,
2320  45, 46, 47, 48, 49
2321  },
2322  inputTensorInfo.GetQuantizationScale(),
2323  inputTensorInfo.GetQuantizationOffset())));
2324 
2325  // Use a depth multiplier of 1 on a 2-channel 4x4 kernel.
2326  armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
2327  auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
2328  QuantizedVector<T>({
2329  32, 31, 30, 29,
2330  28, 27, 26, 25,
2331  24, 23, 22, 21,
2332  20, 19, 18, 17,
2333 
2334  16, 15, 14, 13,
2335  12, 11, 10, 9,
2336  8, 7, 6, 5,
2337  4, 3, 2, 1
2338  },
2339  kernelTensorInfo.GetQuantizationScale(),
2340  kernelTensorInfo.GetQuantizationOffset())));
2341 
2342  // Expected output is 1 batch of a 2-channel 5x5 image.
2343  // Calculated using the python tensorflow library with strideX=1, strideY=1.
2344  armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
2345  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
2346  QuantizedVector<T>({
2347  1062, 1580, 1850, 1530, 1117,
2348  2140, 3108, 3500, 2842, 2042,
2349  3580, 5068, 5460, 4342, 3062,
2350  3618, 5072, 5390, 4248, 2971,
2351  3074, 4282, 4510, 3533, 2457,
2352 
2353  1550, 2284, 2362, 1955, 1428,
2354  2910, 4206, 4342, 3528, 2536,
2355  3390, 4886, 5022, 4068, 2916,
2356  3566, 5056, 5182, 4133, 2922,
2357  3100, 4352, 4452, 3517, 2465
2358  },
2359  outputTensorInfo.GetQuantizationScale(),
2360  outputTensorInfo.GetQuantizationOffset())));
2361 
2362  return DepthwiseConvolution2dAsymmetricTestImpl<ArmnnType, ArmnnBType>(
2363  workloadFactory,
2364  memoryManager,
2365  tensorHandleFactory,
2366  input,
2367  kernel,
2368  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
2369  expectedOutput,
2370  qScale,
2371  qOffset,
2372  layout,
2373  1, // Padding left.
2374  1, // Padding top.
2375  2, // Padding right.
2376  2, // Padding bottom.
2377  1, // strideX
2378  1); // strideY
2379 }
2380 
2381 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2382  typename T = armnn::ResolveType<ArmnnType>>
2384  armnn::IWorkloadFactory& workloadFactory,
2386  const armnn::ITensorHandleFactory& tensorHandleFactory,
2387  float qScale,
2388  int32_t qOffset,
2389  bool biasEnabled)
2390 {
2391  auto layout = armnn::DataLayout::NHWC;
2392 
2393  armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
2394  auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
2395  QuantizedVector<T>({
2396  0, 1, 2, 3, 4,
2397  5, 6, 7, 8, 9,
2398  10, 11, 12, 13, 14,
2399  15, 16, 17, 18, 19,
2400  20, 21, 22, 23, 24,
2401 
2402  25, 26, 27, 28, 29,
2403  30, 31, 32, 33, 34,
2404  35, 36, 37, 38, 39,
2405  40, 41, 42, 43, 44,
2406  45, 46, 47, 48, 49
2407  },
2408  inputTensorInfo.GetQuantizationScale(),
2409  inputTensorInfo.GetQuantizationOffset())));
2410 
2411  armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
2412  auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
2413  QuantizedVector<T>({
2414  32, 31, 30, 29,
2415  28, 27, 26, 25,
2416  24, 23, 22, 21,
2417  20, 19, 18, 17,
2418 
2419  16, 15, 14, 13,
2420  12, 11, 10, 9,
2421  8, 7, 6, 5,
2422  4, 3, 2, 1
2423  },
2424  kernelTensorInfo.GetQuantizationScale(),
2425  kernelTensorInfo.GetQuantizationOffset())));
2426 
2427  armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
2428  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
2429  QuantizedVector<T>({
2430  1062, 1580, 1850, 1530, 1117,
2431  2140, 3108, 3500, 2842, 2042,
2432  3580, 5068, 5460, 4342, 3062,
2433  3618, 5072, 5390, 4248, 2971,
2434  3074, 4282, 4510, 3533, 2457,
2435 
2436  1550, 2284, 2362, 1955, 1428,
2437  2910, 4206, 4342, 3528, 2536,
2438  3390, 4886, 5022, 4068, 2916,
2439  3566, 5056, 5182, 4133, 2922,
2440  3100, 4352, 4452, 3517, 2465
2441  },
2442  outputTensorInfo.GetQuantizationScale(),
2443  outputTensorInfo.GetQuantizationOffset())));
2444 
2445  return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
2446  workloadFactory,
2447  memoryManager,
2448  tensorHandleFactory,
2449  input,
2450  kernel,
2451  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
2452  expectedOutput,
2453  qScale,
2454  qOffset,
2455  layout,
2456  1, // Padding left.
2457  1, // Padding top.
2458  2, // Padding right.
2459  2, // Padding bottom.
2460  1, // strideX
2461  1); // strideY
2462 }
2463 
2464 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2465  typename T = armnn::ResolveType<ArmnnType>>
2467  armnn::IWorkloadFactory& workloadFactory,
2469  const armnn::ITensorHandleFactory& tensorHandleFactory,
2470  float qScale,
2471  int32_t qOffset,
2472  bool biasEnabled)
2473 {
2474  auto layout = armnn::DataLayout::NHWC;
2475 
2476  armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9}, ArmnnType);
2477  auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
2478  QuantizedVector<T>({
2479  0, 0, 0, 0, 0, 0, 0, 0, 0,
2480  0, 0, 0, 0, 0, 0, 0, 0, 0,
2481  0, 0, 0, 0, 0, 0, 0, 0, 0,
2482  0, 0, 0, 1, 1, 1, 0, 0, 0,
2483  0, 0, 0, 1, 1, 1, 0, 0, 0,
2484  0, 0, 0, 1, 1, 1, 0, 0, 0,
2485  0, 0, 0, 0, 0, 0, 0, 0, 0,
2486  0, 0, 0, 0, 0, 0, 0, 0, 0,
2487  0, 0, 0, 0, 0, 0, 0, 0, 0
2488  },
2489  inputTensorInfo.GetQuantizationScale(),
2490  inputTensorInfo.GetQuantizationOffset())));
2491 
2492  armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
2493  auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
2494  QuantizedVector<T>({
2495  1, 2, 3,
2496  4, 5, 6,
2497  7, 8, 9
2498  },
2499  kernelTensorInfo.GetQuantizationScale(),
2500  kernelTensorInfo.GetQuantizationOffset())));
2501 
2502  uint32_t padLeft = 0;
2503  uint32_t padTop = 0;
2504  uint32_t padRight = 0;
2505  uint32_t padBottom = 0;
2506  uint32_t strideX = 1;
2507  uint32_t strideY = 1;
2508  uint32_t dilationX = 3;
2509  uint32_t dilationY = 3;
2510 
2511  // Since the dilation rate is 3 this will reduce the size of the output from 9x9 to 3x3 of all 5s.
2512  armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3}, ArmnnType);
2513  boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
2514  QuantizedVector<T>({
2515  5, 5, 5,
2516  5, 5, 5,
2517  5, 5, 5
2518  },
2519  outputTensorInfo.GetQuantizationScale(),
2520  outputTensorInfo.GetQuantizationOffset())));
2521 
2522  return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
2523  workloadFactory,
2524  memoryManager,
2525  tensorHandleFactory,
2526  input,
2527  kernel,
2528  GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
2529  expectedOutput,
2530  qScale,
2531  qOffset,
2532  layout,
2533  padLeft,
2534  padTop,
2535  padRight,
2536  padBottom,
2537  strideX,
2538  strideY,
2539  dilationX,
2540  dilationY);
2541 }
2542 
2543 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
2545  armnn::IWorkloadFactory& workloadFactory,
2547  const armnn::ITensorHandleFactory& tensorHandleFactory,
2548  const std::vector<float>& inputNoQuantizedValues,
2549  armnn::TensorInfo& inputTensorInfo,
2550  const std::vector<float>& kernelNoQuantizedValues,
2551  armnn::TensorInfo& kernelTensorInfo,
2552  const std::vector<float>& outputExpectedNoQuantizedValues,
2553  armnn::TensorInfo& outputTensorInfo,
2554  uint32_t dilationX,
2555  uint32_t dilationY,
2557  bool biasEnabled = false)
2558 {
2559  float qScale;
2560  int32_t qOffset;
2561  switch (ArmnnType)
2562  {
2565  {
2566  qScale = 0.1f;
2567  qOffset = 128;
2568  break;
2569  }
2571  {
2572  qScale = 0.1f;
2573  qOffset = 0;
2574  break;
2575  }
2577  default:
2578  {
2579  qScale = 0.f;
2580  qOffset = 0;
2581  break;
2582  }
2583  }
2584 
2585  inputTensorInfo.SetQuantizationScale(qScale);
2586  inputTensorInfo.SetQuantizationOffset(qOffset);
2587  kernelTensorInfo.SetQuantizationScale(qScale);
2588  kernelTensorInfo.SetQuantizationOffset(qOffset);
2589  outputTensorInfo.SetQuantizationScale(qScale);
2590  outputTensorInfo.SetQuantizationOffset(qOffset);
2591 
2592  auto input = MakeTensor<T, 4>(inputTensorInfo,
2593  std::vector<T>(QuantizedVector<T>(inputNoQuantizedValues,
2594  inputTensorInfo.GetQuantizationScale(),
2595  inputTensorInfo.GetQuantizationOffset())));
2596  auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
2597  std::vector<T>(QuantizedVector<T>(kernelNoQuantizedValues,
2598  kernelTensorInfo.GetQuantizationScale(),
2599  kernelTensorInfo.GetQuantizationOffset())));
2600  auto expectedOutput =
2601  MakeTensor<T, 4>(outputTensorInfo,
2602  std::vector<T>(QuantizedVector<T>(outputExpectedNoQuantizedValues,
2603  outputTensorInfo.GetQuantizationScale(),
2604  outputTensorInfo.GetQuantizationOffset())));
2605 
2606  uint32_t padLeft = 0;
2607  uint32_t padTop = 0;
2608  uint32_t padRight = 0;
2609  uint32_t padBottom = 0;
2610  uint32_t strideX = 1;
2611  uint32_t strideY = 1;
2612 
2613  return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
2614  workloadFactory,
2615  memoryManager,
2616  tensorHandleFactory,
2617  input,
2618  kernel,
2619  GetBias<ArmnnBType>(biasEnabled, qScale * qScale, outputTensorInfo, layout),
2620  expectedOutput,
2621  qScale,
2622  qOffset,
2623  layout,
2624  padLeft,
2625  padTop,
2626  padRight,
2627  padBottom,
2628  strideX,
2629  strideY,
2630  dilationX,
2631  dilationY);
2632 }
2633 
2634 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
2636  armnn::IWorkloadFactory& workloadFactory,
2638  const armnn::ITensorHandleFactory& tensorHandleFactory,
2639  bool biasEnabled,
2640  const armnn::DataLayout layout)
2641 {
2642  armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
2643  std::vector<float> inputNoQuantizedValues =
2644  {
2645  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2646  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2647  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2648  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2649  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2650  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2651  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2652  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2653  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2654  0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2655  };
2656 
2657  armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
2658  std::vector<float> kernelNoQuantizedValues =
2659  {
2660  1, 2, 3,
2661  4, 5, 6,
2662  7, 8, 9
2663  };
2664 
2665  // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
2666  // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
2667  armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
2668  std::vector<float> outputExpectedNoQuantizedValues =
2669  {
2670  6., 5., 5., 5.,
2671  6., 5., 5., 5.,
2672  6., 5., 5., 5.,
2673  3., 2., 2., 2.
2674  };
2675 
2676  return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2677  workloadFactory,
2678  memoryManager,
2679  tensorHandleFactory,
2680  inputNoQuantizedValues,
2681  inputTensorInfo,
2682  kernelNoQuantizedValues,
2683  kernelTensorInfo,
2684  outputExpectedNoQuantizedValues,
2685  outputTensorInfo,
2686  3,
2687  3,
2688  layout,
2689  biasEnabled);
2690 }
2691 
2692 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
2694  armnn::IWorkloadFactory& workloadFactory,
2696  const armnn::ITensorHandleFactory& tensorHandleFactory,
2697  bool biasEnabled,
2698  const armnn::DataLayout layout)
2699 {
2700  armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
2701  std::vector<float> inputNoQuantizedValues =
2702  {
2703  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2704  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2705  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2706  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2707  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2708  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2709  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2710  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2711  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2712  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2713 
2714  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2715  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2716  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2717  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2718  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2719  0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2720  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2721  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2722  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2723  0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2724  };
2725 
2726  armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
2727  std::vector<float> kernelNoQuantizedValues =
2728  {
2729  1, 2, 3,
2730  4, 5, 6,
2731  7, 8, 9,
2732 
2733  1, 2, 3,
2734  4, 5, 6,
2735  7, 8, 9
2736  };
2737 
2738  // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
2739  // therefore the output will be 2x4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
2740  armnn::TensorInfo outputTensorInfo({ 1, 2, 4, 4}, ArmnnType);
2741  std::vector<float> outputExpectedNoQuantizedValues =
2742  {
2743  6., 5., 5., 5.,
2744  6., 5., 5., 5.,
2745  6., 5., 5., 5.,
2746  3., 2., 2., 2.,
2747 
2748  6., 5., 5., 5.,
2749  6., 5., 5., 5.,
2750  6., 5., 5., 5.,
2751  3., 2., 2., 2.
2752  };
2753 
2754  return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2755  workloadFactory,
2756  memoryManager,
2757  tensorHandleFactory,
2758  inputNoQuantizedValues,
2759  inputTensorInfo,
2760  kernelNoQuantizedValues,
2761  kernelTensorInfo,
2762  outputExpectedNoQuantizedValues,
2763  outputTensorInfo,
2764  3,
2765  3,
2766  layout,
2767  biasEnabled);
2768 }
2769 
2770 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
2772  armnn::IWorkloadFactory& workloadFactory,
2774  const armnn::ITensorHandleFactory& tensorHandleFactory,
2775  bool biasEnabled,
2776  const armnn::DataLayout layout)
2777 {
2778  armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
2779  std::vector<float> inputNoQuantizedValues =
2780  {
2781  10.0, 10.0, 10.0,
2782  10.0, 10.0, 10.0,
2783  10.0, 10.0, 10.0,
2784 
2785  21.0, 22.0, 23.0,
2786  24.0, 25.0, 26.0,
2787  27.0, 28.0, 29.0
2788  };
2789 
2790  armnn::TensorInfo kernelTensorInfo({ 4, 2, 2, 2}, ArmnnType);
2791 
2792  std::vector<float> kernelNoQuantizedValues =
2793  {
2794  0.25f, 0.25f,
2795  0.25f, 0.25f,
2796 
2797  0.25f, 0.25f,
2798  0.25f, 0.25f,
2799 
2800  0.0f , 0.0f,
2801  0.0f , 0.1f,
2802 
2803  0.0f , 0.0f,
2804  0.0f , 0.1f,
2805 
2806  0.2f , 0.0f,
2807  0.0f , 0.0f,
2808 
2809  0.2f , 0.0f,
2810  0.0f , 0.0f,
2811 
2812  0.0f , 0.3f,
2813  0.0f , 0.0f,
2814 
2815  0.0f , 0.3f,
2816  0.0f , 0.0f
2817  };
2818 
2819  armnn::TensorInfo outputTensorInfo({ 1, 8, 2, 2}, ArmnnType);
2820  std::vector<float> outputExpectedNoQuantizedValues =
2821  {
2822  10.f, 10.f,
2823  10.f, 10.f,
2824 
2825  1.f, 1.f,
2826  1.f, 1.f,
2827 
2828  2.f, 2.f,
2829  2.f, 2.f,
2830 
2831  3.f, 3.f,
2832  3.f, 3.f,
2833 
2834  23.f, 24.f,
2835  26.f, 27.f,
2836 
2837  2.5f, 2.6000001f,
2838  2.8f, 2.9f,
2839 
2840  4.2000003f, 4.4f,
2841  4.8f, 5.f,
2842 
2843  6.6000004f, 6.9f,
2844  7.5000005f, 7.8f
2845  };
2846 
2847 
2848  return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2849  workloadFactory,
2850  memoryManager,
2851  tensorHandleFactory,
2852  inputNoQuantizedValues,
2853  inputTensorInfo,
2854  kernelNoQuantizedValues,
2855  kernelTensorInfo,
2856  outputExpectedNoQuantizedValues,
2857  outputTensorInfo,
2858  1,
2859  1,
2860  layout,
2861  biasEnabled);
2862 }
2863 
2864 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
2866  armnn::IWorkloadFactory& workloadFactory,
2868  const armnn::ITensorHandleFactory& tensorHandleFactory,
2869  bool biasEnabled,
2870  const armnn::DataLayout layout)
2871 {
2872  armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
2873  std::vector<float> inputNoQuantizedValues =
2874  {
2875  10.0, 10.0, 10.0,
2876  10.0, 10.0, 10.0,
2877  10.0, 10.0, 10.0,
2878 
2879  21.0, 22.0, 23.0,
2880  24.0, 25.0, 26.0,
2881  27.0, 28.0, 29.0
2882  };
2883 
2884  armnn::TensorInfo kernelTensorInfo({ 2, 2, 2, 2}, ArmnnType);
2885 
2886  std::vector<float> kernelNoQuantizedValues =
2887  {
2888  0.25f, 0.25f,
2889  0.25f, 0.25f,
2890 
2891  0.2f , 0.0f,
2892  0.0f , 0.0f,
2893 
2894  0.0f , 0.0f,
2895  0.0f , 0.1f,
2896 
2897  0.0f , 0.3f,
2898  0.0f , 0.0f
2899 
2900  };
2901 
2902  armnn::TensorInfo outputTensorInfo({ 1, 4, 2, 2}, ArmnnType);
2903  std::vector<float> outputExpectedNoQuantizedValues =
2904  {
2905  10.f, 10.f,
2906  10.f, 10.f,
2907 
2908  1.f, 1.f,
2909  1.f, 1.f,
2910 
2911  4.2000003f, 4.4f,
2912  4.8f, 5.f,
2913 
2914  6.6000004f, 6.9f,
2915  7.5000005f, 7.8f
2916  };
2917 
2918 
2919  return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2920  workloadFactory,
2921  memoryManager,
2922  tensorHandleFactory,
2923  inputNoQuantizedValues,
2924  inputTensorInfo,
2925  kernelNoQuantizedValues,
2926  kernelTensorInfo,
2927  outputExpectedNoQuantizedValues,
2928  outputTensorInfo,
2929  1,
2930  1,
2931  layout,
2932  biasEnabled);
2933 }
2934 
2935 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
2937  armnn::IWorkloadFactory& workloadFactory,
2939  armnn::IWorkloadFactory& refWorkloadFactory,
2940  const armnn::ITensorHandleFactory& tensorHandleFactory,
2941  const armnn::ITensorHandleFactory& refTensorHandleFactory,
2942  const armnnUtils::DataLayoutIndexed& layout)
2943 {
2944  unsigned int inputHeight = 8;
2945  unsigned int inputWidth = 16;
2946  unsigned int inputChannels = 3;
2947  unsigned int inputNum = 5;
2948 
2949  unsigned int kernelHeight = 3;
2950  unsigned int kernelWidth = 3;
2951  unsigned int channelMultiplier = 1;
2952 
2953  unsigned int strideX = 2;
2954  unsigned int strideY = 3;
2955  unsigned int padX = 1;
2956  unsigned int padY = 1;
2957 
2958  unsigned int outputNum = inputNum;
2959  unsigned int outputChannels = inputChannels * channelMultiplier;
2960  unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
2961  unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
2962 
2963  armnn::TensorInfo inputTensorInfo;
2964  armnn::TensorInfo outputTensorInfo;
2965  armnn::TensorInfo kernelDesc;
2966  armnn::TensorInfo biasDesc;
2967 
2968 
2969  std::vector<unsigned int> inputShape;
2970  std::vector<unsigned int> outputShape;
2971  std::vector<unsigned int> kernelShape{ channelMultiplier, inputChannels, kernelHeight, kernelWidth };
2972  std::vector<unsigned int> biasShape{ outputChannels };
2973  switch (layout.GetDataLayout())
2974  {
2976  inputShape = { inputNum, inputChannels, inputHeight, inputWidth };
2977  outputShape = { outputNum, outputChannels, outputHeight, outputWidth };
2978  break;
2980  inputShape = { inputNum, inputHeight, inputWidth, inputChannels };
2981  outputShape = { outputNum, outputHeight, outputWidth, outputChannels };
2982  break;
2983  default:
2984  throw armnn::InvalidArgumentException("unknown data layout ["
2985  + std::to_string(static_cast<int>(layout.GetDataLayout())) + "]");
2986  }
2987 
2988  float inputsQScale = armnn::IsQuantizedType<T>() ? 1.0f : 0;
2989  float outputQScale = armnn::IsQuantizedType<T>() ? 2.0f : 0;
2990  int32_t qOffset = 0;
2991 
2992  inputTensorInfo = armnn::TensorInfo(4, inputShape.data(), ArmnnType, inputsQScale, qOffset);
2993  outputTensorInfo = armnn::TensorInfo(4, outputShape.data(), ArmnnType, outputQScale, qOffset);
2994  kernelDesc = armnn::TensorInfo(4, kernelShape.data(), ArmnnType, inputsQScale, qOffset);
2995  biasDesc = armnn::TensorInfo(
2996  1, biasShape.data(), armnn::GetBiasDataType(ArmnnType), inputsQScale, qOffset);
2997 
2998  LayerTestResult<T, 4> ret(outputTensorInfo);
2999 
3000  auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908, 0.0f, 255.0f);
3001  auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234, 0.0f, 255.0f);
3002  auto bias = MakeRandomTensor<typename FullyConnectedBiasTypeForInputType<T>::Type, 1>(
3003  biasDesc, 1028, 0.0f, 255.0f);
3004 
3005  std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
3006  std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
3007 
3010  armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
3011  armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
3012 
3013  AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
3014  AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
3015 
3016  AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
3017  AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
3018  data.m_Weight = &weightsTensor;
3019  data.m_Bias = &biasTensor;
3020  data.m_Parameters.m_StrideX = strideX;
3021  data.m_Parameters.m_StrideY = strideY;
3022  data.m_Parameters.m_PadLeft = padX;
3023  data.m_Parameters.m_PadRight = padX;
3024  data.m_Parameters.m_PadTop = padY;
3025  data.m_Parameters.m_PadBottom = padY;
3026  data.m_Parameters.m_BiasEnabled = true;
3027  data.m_Parameters.m_DataLayout = layout.GetDataLayout();
3028 
3029  std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refTensorHandleFactory.CreateTensorHandle(outputTensorInfo);
3030  std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refTensorHandleFactory.CreateTensorHandle(inputTensorInfo);
3031 
3033  armnn::WorkloadInfo refInfo = info;
3034  SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
3035  SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
3036 
3037  std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
3038  std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateDepthwiseConvolution2d(refData, refInfo);
3039 
3040  outputHandleRef->Allocate();
3041  inputHandleRef->Allocate();
3042 
3043  inputHandle->Allocate();
3044  outputHandle->Allocate();
3045 
3046  CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
3047  CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
3048 
3049  ExecuteWorkload(*workload, memoryManager);
3050 
3051  workloadRef->PostAllocationConfigure();
3052  workloadRef->Execute();
3053 
3054  CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
3055  CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
3056 
3057  return ret;
3058 }
3059 
3060 //
3061 // Explicit template specializations
3062 //
3064 Convolution2d3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3068  bool,
3070 
3072 Convolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3073  armnn::IWorkloadFactory&,
3074  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3075  const armnn::ITensorHandleFactory&,
3076  bool,
3078 
3080 Convolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmS8, armnn::DataType::Signed32>(
3081  armnn::IWorkloadFactory&,
3082  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3083  const armnn::ITensorHandleFactory&,
3084  bool,
3086 
3088 Convolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3089  armnn::IWorkloadFactory&,
3090  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3091  const armnn::ITensorHandleFactory&,
3092  bool,
3094 
3096 Convolution2d3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3097  armnn::IWorkloadFactory&,
3098  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3099  const armnn::ITensorHandleFactory&,
3100  bool,
3102 
3103 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3104 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3105  armnn::IWorkloadFactory&,
3106  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3107  const armnn::ITensorHandleFactory&,
3108  bool,
3110 
3112 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3113  armnn::IWorkloadFactory&,
3114  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3115  const armnn::ITensorHandleFactory&,
3116  bool,
3118 
3119 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4>
3120 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmS8, armnn::DataType::Signed32>(
3121  armnn::IWorkloadFactory&,
3122  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3123  const armnn::ITensorHandleFactory&,
3124  bool,
3126 
3127 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
3128 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3129  armnn::IWorkloadFactory&,
3130  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3131  const armnn::ITensorHandleFactory&,
3132  bool,
3134 
3135 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
3136 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3137  armnn::IWorkloadFactory&,
3138  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3139  const armnn::ITensorHandleFactory&,
3140  bool,
3142 
3143 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3144 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3145  armnn::IWorkloadFactory &workloadFactory,
3146  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3147  const armnn::ITensorHandleFactory& tensorHandleFactory,
3148  bool biasEnabled,
3149  const armnn::DataLayout layout);
3150 
3151 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3152 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3153  armnn::IWorkloadFactory &workloadFactory,
3154  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3155  const armnn::ITensorHandleFactory& tensorHandleFactory,
3156  bool biasEnabled,
3157  const armnn::DataLayout layout);
3158 
3159 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4>
3160 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QAsymmS8, armnn::DataType::Signed32>(
3161  armnn::IWorkloadFactory &workloadFactory,
3162  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3163  const armnn::ITensorHandleFactory& tensorHandleFactory,
3164  bool biasEnabled,
3165  const armnn::DataLayout layout);
3166 
3167 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
3168 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3169  armnn::IWorkloadFactory &workloadFactory,
3170  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3171  const armnn::ITensorHandleFactory& tensorHandleFactory,
3172  bool biasEnabled,
3173  const armnn::DataLayout layout);
3174 
3175 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
3176 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3177  armnn::IWorkloadFactory &workloadFactory,
3178  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3179  const armnn::ITensorHandleFactory& tensorHandleFactory,
3180  bool biasEnabled,
3181  const armnn::DataLayout layout);
3182 
3183 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3184 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3185  armnn::IWorkloadFactory&,
3186  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3187  const armnn::ITensorHandleFactory&,
3188  bool,
3190 
3191 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3192 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3193  armnn::IWorkloadFactory&,
3194  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3195  const armnn::ITensorHandleFactory&,
3196  bool,
3198 
3199 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4>
3200 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmS8, armnn::DataType::Signed32>(
3201  armnn::IWorkloadFactory&,
3202  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3203  const armnn::ITensorHandleFactory&,
3204  bool,
3206 
3207 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
3208 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3209  armnn::IWorkloadFactory&,
3210  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3211  const armnn::ITensorHandleFactory&,
3212  bool,
3214 
3215 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
3216 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3217  armnn::IWorkloadFactory&,
3218  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3219  const armnn::ITensorHandleFactory&,
3220  bool,
3222 
3223 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3224 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3225  armnn::IWorkloadFactory&,
3226  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3227  const armnn::ITensorHandleFactory&,
3228  bool,
3230 
3231 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3232 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3233  armnn::IWorkloadFactory&,
3234  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3235  const armnn::ITensorHandleFactory&,
3236  bool,
3238 
3239 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4>
3240 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmS8, armnn::DataType::Signed32>(
3241  armnn::IWorkloadFactory&,
3242  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3243  const armnn::ITensorHandleFactory&,
3244  bool,
3246 
3247 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
3248 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3249  armnn::IWorkloadFactory&,
3250  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3251  const armnn::ITensorHandleFactory&,
3252  bool,
3254 
3255 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
3256 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3257  armnn::IWorkloadFactory&,
3258  const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3259  const armnn::ITensorHandleFactory&,
3260  bool,
3262 
3263 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3264 DepthwiseConvolution2dMult4Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3265  armnn::IWorkloadFactory &workloadFactory,
3266  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3267  const armnn::ITensorHandleFactory& tensorHandleFactory,
3268  bool biasEnabled,
3269  const armnn::DataLayout layout);
3270 
3271 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3272 DepthwiseConvolution2dMult4Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3273  armnn::IWorkloadFactory &workloadFactory,
3274  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3275  const armnn::ITensorHandleFactory& tensorHandleFactory,
3276  bool biasEnabled,
3277  const armnn::DataLayout layout);
3278 
3279 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3280 DepthwiseConvolution2dMult2Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3281  armnn::IWorkloadFactory &workloadFactory,
3282  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3283  const armnn::ITensorHandleFactory& tensorHandleFactory,
3284  bool biasEnabled,
3285  const armnn::DataLayout layout);
3286 
3287 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3288 DepthwiseConvolution2dMult2Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3289  armnn::IWorkloadFactory &workloadFactory,
3290  const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3291  const armnn::ITensorHandleFactory& tensorHandleFactory,
3292  bool biasEnabled,
3293  const armnn::DataLayout layout);
3294 
3295 //
3296 // Implementation functions
3297 //
3298 
3300  armnn::IWorkloadFactory& workloadFactory,
3301  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3302  const armnn::ITensorHandleFactory& tensorHandleFactory,
3303  bool biasEnabled,
3304  const armnn::DataLayout layout)
3305 {
3306  return SimpleConvolution2d3x5TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3307  workloadFactory, memoryManager, tensorHandleFactory, 0.f, 0, biasEnabled, layout);
3308 }
3309 
3311  armnn::IWorkloadFactory& workloadFactory,
3312  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3313  const armnn::ITensorHandleFactory& tensorHandleFactory,
3314  bool biasEnabled,
3315  const armnn::DataLayout layout)
3316 {
3317  return SimpleConvolution2d3x5TestCommon<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3318  workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3319 }
3320 
3322  armnn::IWorkloadFactory& workloadFactory,
3323  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3324  const armnn::ITensorHandleFactory& tensorHandleFactory,
3325  bool biasEnabled,
3326  const armnn::DataLayout layout)
3327 {
3328  return SimpleConvolution2d3x3TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3329  workloadFactory, memoryManager, tensorHandleFactory, 0.f, 0, biasEnabled, layout);
3330 }
3331 
3333  armnn::IWorkloadFactory& workloadFactory,
3334  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3335  const armnn::ITensorHandleFactory& tensorHandleFactory,
3336  bool biasEnabled)
3337 {
3338  return SimpleConvolution2d3x3NhwcTestCommon<armnn::DataType::Float32>(
3339  workloadFactory,
3340  memoryManager,
3341  tensorHandleFactory,
3342  0.f,
3343  0,
3344  biasEnabled,
3346 }
3347 
3349  armnn::IWorkloadFactory& workloadFactory,
3350  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3351  const armnn::ITensorHandleFactory& tensorHandleFactory,
3352  bool biasEnabled,
3353  const armnn::DataLayout layout)
3354 {
3355  return SimpleConvolution2d3x3Stride2x2TestCommon<armnn::DataType::Float32>(
3356  workloadFactory,
3357  memoryManager,
3358  tensorHandleFactory,
3359  0.f,
3360  0,
3361  biasEnabled,
3362  layout);
3363 }
3364 
3366  armnn::IWorkloadFactory& workloadFactory,
3367  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3368  const armnn::ITensorHandleFactory& tensorHandleFactory,
3369  bool biasEnabled,
3370  const armnn::DataLayout layout)
3371 {
3372  return SimpleConvolution2d3x3TestCommon<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3373  workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3374 }
3375 
3377  armnn::IWorkloadFactory& workloadFactory,
3378  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3379  const armnn::ITensorHandleFactory& tensorHandleFactory,
3380  bool biasEnabled,
3381  const armnn::DataLayout layout)
3382 {
3383  return SimpleConvolution2d3x5TestCommon<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3384  workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3385 }
3386 
3388  armnn::IWorkloadFactory& workloadFactory,
3389  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3390  const armnn::ITensorHandleFactory& tensorHandleFactory,
3391  bool biasEnabled,
3392  const armnn::DataLayout layout)
3393 {
3394  return SimpleConvolution2d3x3TestCommon<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3395  workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3396 }
3397 
3399  armnn::IWorkloadFactory& workloadFactory,
3400  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3401  const armnn::ITensorHandleFactory& tensorHandleFactory,
3402  armnn::DataLayout layout)
3403 {
3404  return SimpleConvolution2dAsymmetricPaddingTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3405  workloadFactory, memoryManager, tensorHandleFactory, layout, 0.0f, 0);
3406 }
3407 
3409  armnn::IWorkloadFactory& workloadFactory,
3410  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3411  const armnn::ITensorHandleFactory& tensorHandleFactory,
3412  armnn::DataLayout layout)
3413 {
3415  <armnn::DataType::Float32, armnn::DataType::Float32>(
3416  workloadFactory, memoryManager, tensorHandleFactory, layout, 0.0f, 0);
3417 }
3418 
3420  armnn::IWorkloadFactory& workloadFactory,
3421  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3422  const armnn::ITensorHandleFactory& tensorHandleFactory,
3423  bool biasEnabled)
3424 {
3425  return Convolution1dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3426  workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, biasEnabled);
3427 }
3428 
3430  armnn::IWorkloadFactory& workloadFactory,
3431  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3432  const armnn::ITensorHandleFactory& tensorHandleFactory,
3433  bool biasEnabled)
3434 {
3435  return Convolution1dTestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3436  workloadFactory, memoryManager, tensorHandleFactory, 0.1f, 128, biasEnabled);
3437 }
3438 
3440  armnn::IWorkloadFactory& workloadFactory,
3441  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3442  const armnn::ITensorHandleFactory& tensorHandleFactory,
3443  const armnn::DataLayout layout)
3444 {
3445  using namespace armnn;
3446 
3447  const DataType inputType = DataType::QAsymmU8;
3448  const DataType kernelType = DataType::QSymmS8;
3449  const DataType biasType = DataType::Signed32;
3450 
3451  TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128);
3452  TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128);
3453 
3454  const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f };
3455  constexpr unsigned int quantDimension = 0;
3456 
3457  TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension);
3458 
3459  const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f };
3460  TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension);
3461 
3462  std::vector<uint8_t> inputData =
3463  {
3464  138, 108, 138, 108, 138, 108
3465  };
3466 
3467  std::vector<int8_t> kernelData =
3468  {
3469  1, 2, 1, 2, 1, 2
3470  };
3471 
3472  std::vector<int32_t> biasData =
3473  {
3474  4, 4, 4
3475  };
3476 
3477  std::vector<uint8_t> expectedOutputData =
3478  {
3479  121, 118, 115, 121, 118, 115, 121, 118, 115
3480  };
3481 
3482  if (layout == DataLayout::NCHW)
3483  {
3484  PermuteTensorNhwcToNchw(inputInfo, inputData);
3485  PermuteTensorNhwcToNchw(kernelInfo, kernelData);
3486  PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
3487  }
3488 
3489  Convolution2dDescriptor descriptor;
3490  descriptor.m_StrideX = 1;
3491  descriptor.m_StrideY = 1;
3492  descriptor.m_PadLeft = 0;
3493  descriptor.m_PadRight = 0;
3494  descriptor.m_PadTop = 0;
3495  descriptor.m_PadBottom = 0;
3496  descriptor.m_BiasEnabled = true;
3497  descriptor.m_DataLayout = layout;
3498 
3499 
3500  std::unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
3501  std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
3502 
3503 
3504  WorkloadInfo workloadInfo;
3505  ScopedCpuTensorHandle weightTensor(kernelInfo);
3506  ScopedCpuTensorHandle biasTensor(biasInfo);
3507 
3508  AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
3509  AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
3510 
3511  Convolution2dQueueDescriptor queueDescriptor;
3512  queueDescriptor.m_Parameters = descriptor;
3513  queueDescriptor.m_Weight = &weightTensor;
3514  queueDescriptor.m_Bias = &biasTensor;
3515 
3516  AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
3517  AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
3518 
3519  std::unique_ptr<IWorkload> workload = workloadFactory.CreateConvolution2d(queueDescriptor, workloadInfo);
3520  inputHandle->Allocate();
3521  outputHandle->Allocate();
3522 
3523  CopyDataToITensorHandle(inputHandle.get(), inputData.data());
3524 
3525  ExecuteWorkload(*workload, memoryManager);
3526 
3527  LayerTestResult<uint8_t, 4> ret(outputInfo);
3528  CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
3529  ret.outputExpected = MakeTensor<uint8_t, 4>(outputInfo, expectedOutputData);
3530 
3531  return ret;
3532 }
3533 
3535  armnn::IWorkloadFactory& workloadFactory,
3536  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3537  armnn::IWorkloadFactory& refWorkloadFactory,
3538  const armnn::ITensorHandleFactory& tensorHandleFactory,
3539  const armnn::ITensorHandleFactory& refTensorHandleFactory)
3540 {
3541  return CompareConvolution2dTestImpl<armnn::DataType::Float32>(
3542  workloadFactory, memoryManager, refWorkloadFactory, tensorHandleFactory, refTensorHandleFactory);
3543 }
3544 
3546  armnn::IWorkloadFactory& workloadFactory,
3547  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3548  const armnn::ITensorHandleFactory& tensorHandleFactory,
3549  bool biasEnabled,
3550  const armnn::DataLayout layout)
3551 {
3552  return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3553  workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, biasEnabled, layout);
3554 }
3555 
3557  armnn::IWorkloadFactory& workloadFactory,
3558  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3559  const armnn::ITensorHandleFactory& tensorHandleFactory,
3560  bool biasEnabled)
3561 {
3562  return DepthwiseConvolution2dNhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3563  workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, biasEnabled);
3564 }
3565 
3567  armnn::IWorkloadFactory& workloadFactory,
3568  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3569  const armnn::ITensorHandleFactory& tensorHandleFactory,
3570  bool biasEnabled,
3571  const armnn::DataLayout layout)
3572 {
3573  return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3574  workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, biasEnabled, layout);
3575 }
3576 
3578  armnn::IWorkloadFactory& workloadFactory,
3579  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3580  const armnn::ITensorHandleFactory& tensorHandleFactory)
3581 {
3582  armnn::TensorInfo inputTensorInfo({ 1, 1, 2, 2 }, armnn::DataType::Float32);
3583  auto input = MakeTensor<float, 4>(inputTensorInfo, { 1.f, 2.f, 3.f, 4.f });
3584 
3585  std::vector<float> kernelData;
3586  std::vector<float> singleDepthKernel{ 1.f, -1.f, -1.f, 1.f };
3587  for (unsigned int i = 0; i < 64; ++i)
3588  {
3589  kernelData.insert(kernelData.end(), singleDepthKernel.begin(), singleDepthKernel.end());
3590  }
3591  armnn::TensorInfo kernelTensorInfo({ 64, 1, 2, 2 }, armnn::DataType::Float32);
3592  auto kernel = MakeTensor<float, 4>(kernelTensorInfo, kernelData);
3593 
3594  std::vector<float> expectedOutputData(64, 0.f);
3595  armnn::TensorInfo outputTensorInfo({ 1, 64, 1, 1 }, armnn::DataType::Float32);
3596  auto expectedOutput = MakeTensor<float, 4>(outputTensorInfo, expectedOutputData);
3597 
3598  return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3599  workloadFactory,
3600  memoryManager,
3601  tensorHandleFactory,
3602  input,
3603  kernel,
3604  boost::multi_array<float, 1>(),
3605  expectedOutput,
3606  0.f,
3607  0,
3609 }
3610 
3612  armnn::IWorkloadFactory& workloadFactory,
3613  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3614  const armnn::ITensorHandleFactory& tensorHandleFactory,
3615  bool biasEnabled,
3616  const armnn::DataLayout layout)
3617 {
3618  return DepthwiseConvolution2dAsymmetricTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3619  workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, biasEnabled, layout);
3620 }
3621 
3623  armnn::IWorkloadFactory& workloadFactory,
3624  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3625  const armnn::ITensorHandleFactory& tensorHandleFactory,
3626  bool biasEnabled,
3627  const armnn::DataLayout layout)
3628 {
3629  return DepthwiseConvolution2dTestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3630  workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3631 }
3632 
3634  armnn::IWorkloadFactory& workloadFactory,
3635  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3636  const armnn::ITensorHandleFactory& tensorHandleFactory,
3637  bool biasEnabled,
3638  const armnn::DataLayout layout)
3639 {
3640  return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3641  workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3642 }
3643 
3645  armnn::IWorkloadFactory& workloadFactory,
3646  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3647  const armnn::ITensorHandleFactory& tensorHandleFactory)
3648 {
3649  return SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3650  workloadFactory,
3651  memoryManager,
3652  tensorHandleFactory,
3653  0.f,
3654  0,
3655  false);
3656 }
3657 
3659  armnn::IWorkloadFactory& workloadFactory,
3660  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3661  const armnn::ITensorHandleFactory& tensorHandleFactory,
3662  bool biasEnabled,
3663  const armnn::DataLayout layout)
3664 {
3665  return DepthwiseConvolution2dTestImpl<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3666  workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3667 }
3668 
3670  armnn::IWorkloadFactory& workloadFactory,
3671  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3672  const armnn::ITensorHandleFactory& tensorHandleFactory,
3673  bool biasEnabled,
3674  const armnn::DataLayout layout)
3675 {
3676  return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3677  workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3678 }
3679 
3681  armnn::IWorkloadFactory& workloadFactory,
3682  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3683  const armnn::ITensorHandleFactory& tensorHandleFactory,
3684  const armnn::DataLayout layout)
3685 {
3686  using namespace armnn;
3687 
3688  const DataType inputType = DataType::QAsymmU8;
3689  const DataType kernelType = DataType::QSymmS8;
3690  const DataType biasType = DataType::Signed32;
3691 
3692  TensorInfo inputInfo ({ 1, 3, 3, 2 }, inputType, 0.5f, 128); // N H W C
3693  TensorInfo outputInfo({ 1, 2, 2, 4 }, inputType, 1.0f, 128); // N H W C
3694 
3695  const std::vector<float> quantScales{ 1.0f, 0.5f, 1.0f, 0.5f };
3696  const unsigned int quantDimension = 0;
3697  TensorInfo kernelInfo({ 2, 2, 2, 2 }, kernelType, quantScales, quantDimension); // M I H W
3698 
3699  const std::vector<float> biasQuantScales{ 0.5f, 0.25f, 0.5f, 0.25f };
3700  constexpr unsigned int biasQuantDimension = 0;
3701  TensorInfo biasInfo({ 4 }, biasType, biasQuantScales, biasQuantDimension);
3702 
3703  std::vector<uint8_t> inputData =
3704  {
3705  129, 130,
3706  129, 130,
3707  129, 130,
3708  129, 130,
3709  129, 130,
3710  129, 130,
3711  129, 130,
3712  129, 130,
3713  129, 130
3714  };
3715 
3716  std::vector<int8_t> kernelData =
3717  {
3718  1, 1, 1, 1,
3719  1, 1, 1, 1,
3720  1, 1, 1, 1,
3721  1, 1, 1, 1
3722  };
3723 
3724  std::vector<int32_t> biasData =
3725  {
3726  4, 4, 4, 4
3727  };
3728 
3729  std::vector<uint8_t> expectedOutputData =
3730  {
3731  132, 130, 134, 131,
3732  132, 130, 134, 131,
3733  132, 130, 134, 131,
3734  132, 130, 134, 131
3735  };
3736 
3737  if (layout == DataLayout::NCHW)
3738  {
3739  PermuteTensorNhwcToNchw(inputInfo, inputData);
3740  PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
3741  }
3742 
3744  descriptor.m_StrideX = 1;
3745  descriptor.m_StrideY = 1;
3746  descriptor.m_PadLeft = 0;
3747  descriptor.m_PadRight = 0;
3748  descriptor.m_PadTop = 0;
3749  descriptor.m_PadBottom = 0;
3750  descriptor.m_DilationX = 1;
3751  descriptor.m_DilationY = 1;
3752  descriptor.m_BiasEnabled = true;
3753  descriptor.m_DataLayout = layout;
3754 
3755  std::unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
3756  std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
3757 
3758  WorkloadInfo workloadInfo;
3759  ScopedCpuTensorHandle weightTensor(kernelInfo);
3760  ScopedCpuTensorHandle biasTensor(biasInfo);
3761 
3762  AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
3763  AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
3764 
3765  DepthwiseConvolution2dQueueDescriptor queueDescriptor;
3766  queueDescriptor.m_Parameters = descriptor;
3767  queueDescriptor.m_Weight = &weightTensor;
3768  queueDescriptor.m_Bias = &biasTensor;
3769 
3770  AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
3771  AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
3772 
3773  std::unique_ptr<IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(queueDescriptor, workloadInfo);
3774  inputHandle->Allocate();
3775  outputHandle->Allocate();
3776 
3777  CopyDataToITensorHandle(inputHandle.get(), inputData.data());
3778 
3779  ExecuteWorkload(*workload, memoryManager);
3780 
3781  LayerTestResult<uint8_t, 4> ret(outputInfo);
3782 
3783  CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
3784  ret.outputExpected = MakeTensor<uint8_t, 4>(outputInfo, expectedOutputData);
3785 
3786  return ret;
3787 }
3788 
3790  armnn::IWorkloadFactory& workloadFactory,
3791  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3792  armnn::IWorkloadFactory& refWorkloadFactory,
3793  const armnn::ITensorHandleFactory& tensorHandleFactory,
3794  const armnn::ITensorHandleFactory& refTensorHandleFactory,
3795  const armnn::DataLayout layout)
3796 {
3797  return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::Float32>(
3798  workloadFactory, memoryManager, refWorkloadFactory, tensorHandleFactory, refTensorHandleFactory, layout);
3799 }
3800 
3802  armnn::IWorkloadFactory& workloadFactory,
3803  const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3804  armnn::IWorkloadFactory& refWorkloadFactory,
3805  const armnn::ITensorHandleFactory& tensorHandleFactory,
3806  const armnn::ITensorHandleFactory& refTensorHandleFactory,
3807  const armnn::DataLayout layout)
3808 {
3809  return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::QAsymmU8>(
3810  workloadFactory, memoryManager, refWorkloadFactory, tensorHandleFactory, refTensorHandleFactory, layout);
3811 }
LayerTestResult< T, 4 > DepthwiseConvolution2dMult2Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< float, 4 > Convolution2d3x3Stride2x2BFloat16Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout &dataLayout)
uint32_t m_PadBottom
Padding bottom value in the height dimension.
bool m_BiasEnabled
Enable/disable bias.
const ConstCpuTensorHandle * m_Bias
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
LayerTestResult< uint8_t, 4 > DepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
bool m_BiasEnabled
Enable/disable bias.
LayerTestResult< int16_t, 4 > SimpleConvolution2d3x5QSymm16Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > SimpleConvolution2d3x3Stride2x2TestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout &dataLayout)
DataLayout
Definition: Types.hpp:50
LayerTestResult< T, 4 > Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, const armnn::DataLayout layout, float qScale, int32_t qOffset)
const TensorShape & GetShape() const
Definition: Tensor.hpp:187
uint32_t m_PadBottom
Padding bottom value in the height dimension.
LayerTestResult< float, 4 > Convolution2d3x3Stride2x2BFloat16SmallValueTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout &dataLayout)
void ApplyBias(std::vector< T > &v, float vScale, int32_t vOffset, const std::vector< B > &bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h)
LayerTestResult< T, 4 > Convolution2d2x3x3Dilation3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
LayerTestResult< uint8_t, 4 > SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
A Convolution2dDescriptor for the Convolution2dLayer.
uint32_t m_PadLeft
Padding left value in the width dimension.
boost::multi_array< T, n > outputExpected
LayerTestResult< T, 4 > CompareConvolution2dTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::IWorkloadFactory &refWorkloadFactory, const armnn::ITensorHandleFactory &tensorHandleFactory, const armnn::ITensorHandleFactory &refTensorHandleFactory)
LayerTestResult< T, 4 > Convolution2d3x3DilationTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, const std::vector< float > &inputNoQuantizedValues, armnn::TensorInfo &inputTensorInfo, const std::vector< float > &kernelNoQuantizedValues, armnn::TensorInfo &kernelTensorInfo, const std::vector< float > &outputExpectedNoQuantizedValues, armnn::TensorInfo &outputTensorInfo, uint32_t dilationX, uint32_t dilationY, armnn::DataLayout layout=armnn::DataLayout::NCHW, uint32_t padLeft=0, uint32_t padTop=0, uint32_t padRight=0, uint32_t padBottom=0, uint32_t strideX=1, uint32_t strideY=1, bool biasEnabled=false)
LayerTestResult< float, 4 > SimpleConvolution2d3x3NhwcTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled)
LayerTestResult< T, 4 > DepthwiseConvolution2dDepthMul1TestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout)
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:73
LayerTestResult< T, 4 > SimpleConvolution2d3x3NhwcTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset, bool biasEnabled, armnn::DataLayout dataLayout)
LayerTestResult< T, 4 > DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout)
uint32_t m_PadRight
Padding right value in the width dimension.
LayerTestResult< uint8_t, 4 > Convolution1dUint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled)
LayerTestResult< T, 4 > SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, const armnn::DataLayout layout, float qScale, int32_t qOffset)
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
LayerTestResult< float, 4 > Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, armnn::DataLayout layout)
uint32_t m_DilationY
Dilation along y axis.
uint32_t m_DilationY
Dilation factor value for height dimension.
LayerTestResult< T, 4 > DepthwiseConvolution2dAsymmetricTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, const boost::multi_array< T, 4 > &input, const boost::multi_array< T, 4 > &kernel, const boost::multi_array< B, 1 > &bias, const boost::multi_array< T, 4 > &outputExpected, float qScale, int32_t qOffset, const armnn::DataLayout layout, uint32_t padLeft=0, uint32_t padTop=0, uint32_t padRight=0, uint32_t padBottom=0, uint32_t strideX=1, uint32_t strideY=1)
LayerTestResult< T, 4 > DepthwiseConvolution2dMult4Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< float, 4 > DepthwiseConvolution2dDepthMul64Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
LayerTestResult< int16_t, 4 > SimpleConvolution2d3x3QSymm16Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
uint32_t m_PadTop
Padding top value in the height dimension.
void Permute(const armnn::TensorShape &dstShape, const armnn::PermutationVector &mappings, const void *src, void *dst, size_t dataTypeSize)
Definition: Permute.cpp:131
LayerTestResult< T, 4 > DepthwiseConvolution2dAsymmetricTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout)
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
LayerTestResult< T, 4 > DepthwiseConvolution2d3x3Dilation3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< uint8_t, 4 > DepthwiseConvolution2dDepthMul1Uint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
DataType
Definition: Types.hpp:32
uint32_t m_DilationX
Dilation factor value for width dimension.
uint32_t m_PadTop
Padding top value in the height dimension.
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
int32_t GetQuantizationOffset() const
Definition: Tensor.cpp:469
LayerTestResult< T, 4 > DepthwiseConvolution2d3x3DilationTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, const std::vector< float > &inputNoQuantizedValues, armnn::TensorInfo &inputTensorInfo, const std::vector< float > &kernelNoQuantizedValues, armnn::TensorInfo &kernelTensorInfo, const std::vector< float > &outputExpectedNoQuantizedValues, armnn::TensorInfo &outputTensorInfo, uint32_t dilationX, uint32_t dilationY, armnn::DataLayout layout=armnn::DataLayout::NCHW, bool biasEnabled=false)
float GetQuantizationScale() const
Definition: Tensor.cpp:452
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout...
const ConstCpuTensorHandle * m_Weight
LayerTestResult< T, 4 > DepthwiseConvolution2d2x3x3Dilation3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
boost::multi_array< T, 1 > GetBias2(bool biasEnabled, float qScale)
LayerTestResult< float, 4 > SimpleConvolution2d3x5Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
void SetQuantizationScale(float scale)
Definition: Tensor.cpp:464
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
void AllocateAndCopyDataToITensorHandle(armnn::ITensorHandle *tensorHandle, const void *memory)
armnn::DataLayout GetDataLayout() const
void CopyDataFromITensorHandle(void *memory, const armnn::ITensorHandle *tensorHandle)
boost::multi_array< T, 1 > GetBias4(bool biasEnabled, float qScale)
LayerTestResult< T, 4 > CompareDepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::IWorkloadFactory &refWorkloadFactory, const armnn::ITensorHandleFactory &tensorHandleFactory, const armnn::ITensorHandleFactory &refTensorHandleFactory, const armnnUtils::DataLayoutIndexed &layout)
LayerTestResult< T, 4 > SimpleConvolution2dTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, const boost::multi_array< T, 4 > &originalInput, const boost::multi_array< T, 4 > &originalKernel, const boost::multi_array< B, 1 > &bias, const boost::multi_array< T, 4 > &originalOutputExpected, float qScale, int32_t qOffset, const armnn::DataLayout layout=armnn::DataLayout::NCHW, uint32_t padLeft=0, uint32_t padTop=0, uint32_t padRight=0, uint32_t padBottom=0, uint32_t strideX=1, uint32_t strideY=1, uint32_t dilationX=1, uint32_t dilationY=1)
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
LayerTestResult< float, 4 > DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
DataType GetBiasDataType(DataType inputDataType)
LayerTestResult< uint8_t, 4 > SimpleConvolution2d3x3Uint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< float, 4 > Convolution2dAsymmetricPaddingTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, armnn::DataLayout layout)
LayerTestResult< O, 4 > SimpleConvolution2dNhwcTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, const boost::multi_array< T, 4 > &input, const boost::multi_array< T, 4 > &kernel, const boost::multi_array< B, 1 > &bias, const boost::multi_array< O, 4 > &outputExpected, const armnn::DataLayout dataLayout, float qScale, int32_t qOffset, uint32_t padLeft=1, uint32_t padTop=1, uint32_t padRight=1, uint32_t padBottom=1, uint32_t strideX=1, uint32_t strideY=1)
boost::multi_array< T, n > output
LayerTestResult< float, 4 > SimpleConvolution2d3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< T, 4 > Convolution2d3x3Dilation3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
uint32_t m_DilationX
Dilation along x axis.
boost::multi_array< T, 1 > GetBias8(bool biasEnabled, float qScale)
LayerTestResult< float, 4 > Convolution1dTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled)
LayerTestResult< T, 4 > SimpleConvolution2d3x5TestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout)
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
LayerTestResult< uint8_t, 4 > DepthwiseConvolution2dPerAxisQuantTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, const armnn::DataLayout layout)
boost::multi_array< T, 1 > GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout)
LayerTestResult< T, 4 > DepthwiseConvolution2dNhwcTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset, bool biasEnabled)
LayerTestResult< T, 4 > Convolution1dTestImpl(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset, bool biasEnabled)
LayerTestResult< int16_t, 4 > DepthwiseConvolution2dDepthMul1Int16Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35
armnn::TensorInfo GetTensorInfo(unsigned int numberOfBatches, unsigned int numberOfChannels, unsigned int height, unsigned int width, const armnn::DataLayout dataLayout, const armnn::DataType dataType)
Definition: TensorUtils.cpp:38
LayerTestResult< float, 4 > DepthwiseConvolution2dAsymmetricTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
LayerTestResult< float, 4 > DepthwiseConvolution2dDepthNhwcTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled)
Contains information about inputs and outputs to a layer.
LayerTestResult< float, 4 > CompareDepthwiseConvolution2dFloatTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::IWorkloadFactory &refWorkloadFactory, const armnn::ITensorHandleFactory &tensorHandleFactory, const armnn::ITensorHandleFactory &refTensorHandleFactory, const armnn::DataLayout layout)
float SelectiveDequantize(T value, float scale, int32_t offset)
LayerTestResult< float, 4 > SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:480
LayerTestResult< uint8_t, 4 > Convolution2dPerAxisQuantTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, const armnn::DataLayout layout)
LayerTestResult< float, 4 > CompareConvolution2dTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::IWorkloadFactory &refWorkloadFactory, const armnn::ITensorHandleFactory &tensorHandleFactory, const armnn::ITensorHandleFactory &refTensorHandleFactory)
LayerTestResult< T, 4 > SimpleConvolution2d3x3TestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout)
virtual std::unique_ptr< IWorkload > CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const
LayerTestResult< T, 4 > SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset, bool biasEnabled)
LayerTestResult< int16_t, 4 > DepthwiseConvolution2dInt16Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
unsigned int GetChannelsIndex() const
LayerTestResult< uint8_t, 4 > CompareDepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, armnn::IWorkloadFactory &refWorkloadFactory, const armnn::ITensorHandleFactory &tensorHandleFactory, const armnn::ITensorHandleFactory &refTensorHandleFactory, const armnn::DataLayout layout)
virtual std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const =0
LayerTestResult< float, 4 > DepthwiseConvolution2dTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
uint32_t m_PadLeft
Padding left value in the width dimension.
LayerTestResult< float, 4 > SimpleConvolution2d3x3Stride2x2Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
virtual std::unique_ptr< IWorkload > CreateConvolution2d(const Convolution2dQueueDescriptor &descriptor, const WorkloadInfo &info) const
void CopyDataToITensorHandle(armnn::ITensorHandle *tensorHandle, const void *memory)
LayerTestResult< T, 4 > Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
uint32_t m_PadRight
Padding right value in the width dimension.
void PermuteTensorNhwcToNchw(armnn::TensorInfo &tensorInfo, std::vector< T > &tensorData)