ArmNN
 24.02
WorkloadUtils.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017, 2023 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
10 #include <armnn/Tensor.hpp>
12 #include <armnnUtils/Permute.hpp>
13 
14 #include <Half.hpp>
15 #include <Profiling.hpp>
16 
17 
18 namespace armnn
19 {
20 namespace
21 {
22 
23 template <typename ArrayType, typename Arg>
24 void AssignValues(unsigned int num, unsigned int& idx, const ArrayType& array, Arg& arg)
25 {
26  if (idx >= num)
27  {
28  return;
29  }
30 
31  arg = array[(num - 1) - idx];
32  idx++;
33 }
34 
35 template <typename T, typename ArrayType, typename... Args>
36 void AssignValues(unsigned int num, unsigned int idx, const ArrayType& array, T& assignee, Args&... args)
37 {
38  AssignValues(num, idx, array, assignee);
39 
40  AssignValues(num, idx, array, args...);
41 }
42 
43 } // anonymous namespace
44 
45 template <typename CopyFunc>
46 void CopyTensorContentsGeneric(const ITensorHandle* srcTensor, ITensorHandle* dstTensor, CopyFunc copy)
47 {
48  // For ease of understanding, names are assigned to the dimensions
49  // of the tensor as if NHWC, however this routine works with any 5D tensor
50  static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyTensorContents");
51 
52  TensorShape srcStrides = srcTensor->GetStrides();
53  const TensorShape& srcShape = srcTensor->GetShape();
54  const auto srcSize = srcTensor->GetStrides()[0] * srcShape[0];
55  TensorShape dstStrides = dstTensor->GetStrides();
56  const TensorShape& dstShape = dstTensor->GetShape();
57  const auto dstSize = dstTensor->GetStrides()[0] * dstShape[0];
58 
59  size_t srcDepth = 1;
60  size_t srcBatches = 1;
61  size_t srcHeight = 1;
62  size_t srcWidth = 1;
63  size_t srcChannels = 1;
64  AssignValues(srcShape.GetNumDimensions(),
65  0,
66  srcShape,
67  srcChannels,
68  srcWidth,
69  srcHeight,
70  srcBatches,
71  srcDepth);
72 
73  size_t srcDepthStride = 0;
74  size_t srcBatchStride = 0;
75  size_t srcHeightStride = 0;
76  size_t srcWidthStride = 0;
77  size_t srcChannelStride = 0;
78  AssignValues(srcStrides.GetNumDimensions(),
79  0,
80  srcStrides,
81  srcChannelStride,
82  srcWidthStride,
83  srcHeightStride,
84  srcBatchStride,
85  srcDepthStride);
86 
87  size_t dstDepth = 1;
88  size_t dstBatches = 1;
89  size_t dstHeight = 1;
90  size_t dstWidth = 1;
91  size_t dstChannels = 1;
92  AssignValues(dstShape.GetNumDimensions(),
93  0,
94  dstShape,
95  dstChannels,
96  dstWidth,
97  dstHeight,
98  dstBatches,
99  dstDepth);
100 
101  size_t dstDepthStride = 0;
102  size_t dstBatchStride = 0;
103  size_t dstHeightStride = 0;
104  size_t dstWidthStride = 0;
105  size_t dstChannelStride = 0;
106  AssignValues(dstStrides.GetNumDimensions(),
107  0,
108  dstStrides,
109  dstChannelStride,
110  dstWidthStride,
111  dstHeightStride,
112  dstBatchStride,
113  dstDepthStride);
114 
115  const unsigned char* srcDataStart;
116  unsigned char* dstDataStart;
117  {
118  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Synchronize buffers");
119  srcDataStart = static_cast<const uint8_t*>(srcTensor->Map());
120  dstDataStart = static_cast<uint8_t*>(dstTensor->Map());
121  }
122  if (srcDataStart == nullptr)
123  {
124  throw MemoryValidationException("The source tensor is null.");
125  }
126  if (dstDataStart == nullptr)
127  {
128  throw MemoryValidationException("The destination tensor is null.");
129  }
130 
131  size_t copyLength = std::min(srcChannels * srcChannelStride, dstChannels * dstChannelStride);
132  size_t copyWidth = std::min(srcWidth, dstWidth);
133  size_t copyHeight = std::min(srcHeight, dstHeight);
134  size_t copyBatches = std::min(srcBatches, dstBatches);
135  size_t copyDepth = std::min(srcDepth, dstDepth);
136 
137  // Coalesce inner dimensions where possible
138  // to reduce overheard calling copy() and to
139  // allow for memory bandwidth optimisations
140  if (copyLength == srcWidthStride &&
141  copyLength == dstWidthStride)
142  {
143  // There is no special padding between rows,
144  // and sizes are compatible, so copy whole rows
145  copyLength *= copyWidth;
146  copyWidth = 1;
147 
148  if (copyLength == srcHeightStride &&
149  copyLength == dstHeightStride)
150  {
151  // There is no special padding between batches
152  // and sizes are compatible so copy whole batches
153  copyLength *= copyHeight;
154  copyHeight = 1;
155  }
156  }
157 
158  const unsigned char* srcData = srcDataStart;
159  unsigned char* dstData = dstDataStart;
160  for (unsigned int d = 0; d < copyDepth; ++d)
161  {
162  auto srcPtrDepth = srcData;
163  auto dstPtrDepth = dstData;
164  for (unsigned int b = 0; b < copyBatches; ++b)
165  {
166  auto srcPtrBatch = srcData;
167  auto dstPtrBatch = dstData;
168  for (unsigned int h = 0; h < copyHeight; ++h)
169  {
170  auto srcPtrChannel = srcData;
171  auto dstPtrChannel = dstData;
172  for (unsigned int w = 0; w < copyWidth; ++w)
173  {
174  // Sanity check the memory area we've been asked to copy from and to.
175  if (copyLength > srcSize)
176  {
178  "The source tensor size does not match the size of the allocated tensor.");
179  }
180  if (copyLength > dstSize)
181  {
183  "The destination tensor size will overrun the destination tensor.");
184  }
185  copy(dstData, srcData, copyLength);
186  dstData += dstWidthStride;
187  srcData += srcWidthStride;
188  }
189  dstData += (static_cast<long>(dstHeightStride) - (dstData - dstPtrChannel));
190  srcData += (static_cast<long>(srcHeightStride) - (srcData - srcPtrChannel));
191  }
192  dstData += (static_cast<long>(dstBatchStride) - (dstData - dstPtrBatch));
193  srcData += (static_cast<long>(srcBatchStride) - (srcData - srcPtrBatch));
194  }
195  dstData += (static_cast<long>(dstDepthStride) - (dstData - dstPtrDepth));
196  srcData += (static_cast<long>(srcDepthStride) - (srcData - srcPtrDepth));
197  }
198 
199  srcTensor->Unmap();
200  dstTensor->Unmap();
201 }
202 
203 template <typename SrcTensorHandleType, typename DstTensorHandleType, typename DescriptorType>
204 void GatherTensorHandlePairs(const DescriptorType& descriptor,
205  std::vector<std::pair<SrcTensorHandleType*, DstTensorHandleType*>>& tensorHandlePairs)
206 {
207  const unsigned int numInputs = static_cast<unsigned int>(descriptor.m_Inputs.size());
208  tensorHandlePairs.reserve(numInputs);
209 
210  for (unsigned int i = 0; i < numInputs; ++i)
211  {
212  SrcTensorHandleType* const srcTensorHandle =
213  PolymorphicDowncast<SrcTensorHandleType*>(descriptor.m_Inputs[i]);
214  DstTensorHandleType* const dstTensorHandle =
215  PolymorphicDowncast<DstTensorHandleType*>(descriptor.m_Outputs[i]);
216 
217  tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle);
218  }
219 }
220 
221 int32_t ConvertMaskToACLFormat(int32_t mask, int32_t numDim);
222 
223 armnn::ConstTensor PermuteTensor(const ConstTensorHandle* tensor,
224  const PermutationVector& permutationVector,
225  void* permuteBuffer);
226 
227 void ReshapeWeightsForAcl(TensorInfo& weightInfo, DataLayout dataLayout);
228 
229 TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo& weightInfo, DataLayout dataLayout);
230 
231 /// Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M]
232 /// This function coverts a TensorInfo from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC)
233 /// as required by the compute library
234 /// Returns a tuple of converted weights tensor info and depth multiplier
235 std::tuple<TensorInfo, unsigned int> Convert1HWOTensorInfoToAcl(const TensorInfo& weightInfo,
236  const TensorInfo& inputInfo,
237  const DataLayout dataLayout);
238 
239 armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstTensorHandle* weightTensor,
240  DataLayout dataLayout,
241  void* permuteBuffer);
242 
243 /// Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M]
244 /// This function coverts a ConstCpuTensorHandle from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or
245 /// keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library
246 ///
247 /// \param weightTensor - ConstTensorHandle of weights tensor
248 /// \param inputInfo - TensorInfo of input tensor
249 /// \param dataLayout - DataLayout of the input tensor
250 /// \param permuteBuffer - Pointer to memory with the size of tensor. Used for the permutation
251 /// \return tuple of transformed weights-ConstTensor and depthwise multiplier
252 std::tuple<ConstTensor, unsigned int> Convert1HWOTensorToAcl(const ConstTensorHandle* weightTensor,
253  const TensorInfo& inputInfo,
254  const DataLayout dataLayout,
255  void* permuteBuffer);
256 
257 /// Converts a (weights) tensor from [1, H, W, I*M] = [1, H, W, O] to [M, I, H, W]
258 ///
259 /// \param weightTensor - ConstTensorHandle of the weight tensor that should be converted
260 /// \param inputInfo - TensorInfo of the corresponding input tensor
261 /// \param dataLayout - DataLayout of the input tensor e.g. NHWC or NCHW
262 /// \param permuteBuffer - Memory location with the same size as the weight tensor to write converted data to
263 /// \return - A tuple of ConstTensor and unsigned int which is the converted weightTensor and the depthMultiplier
264 std::tuple<ConstTensor, unsigned int> Convert1HWOtoMIHW(const ConstTensorHandle* weightTensor,
265  const TensorInfo& inputInfo,
266  const DataLayout& dataLayout,
267  void* permuteBuffer);
268 
269 /// Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1)
270 ///
271 /// \param inputInfo0 - TensorInfo of the corresponding input tensor: params
272 /// \param inputInfo1 - TensorInfo of the corresponding input tensor: indices
273 /// \return - A map with names and values for N, ND, K, W, C
274 std::map<std::string, unsigned int> CalculateGatherNdKeyIndices(TensorInfo inputInfo0, TensorInfo inputInfo1);
275 
276 /// Generates a permutation vector of size rank that permutes the 2 most right dimensions
277 ///
278 /// \param rank - Tensor rank, i.e. number of dimensions in the tensors
279 /// \return - A permutation vector that permutes the 2 last dimensions
281 
282 } //namespace armnn
armnn::Compute::Undefined
@ Undefined
armnn::Convert1HWOTensorInfoToAcl
std::tuple< TensorInfo, unsigned int > Convert1HWOTensorInfoToAcl(const TensorInfo &weightInfo, const TensorInfo &inputInfo, const DataLayout dataLayout)
Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a TensorInfo...
Definition: WorkloadUtils.cpp:176
armnn::Convert1HWOTensorToAcl
std::tuple< ConstTensor, unsigned int > Convert1HWOTensorToAcl(const ConstTensorHandle *weightTensor, const TensorInfo &inputInfo, const DataLayout dataLayout, void *permuteBuffer)
Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a ConstCpuTe...
Definition: WorkloadUtils.cpp:145
armnn::DataLayout
DataLayout
Definition: Types.hpp:62
armnn::PermuteTensor
armnn::ConstTensor PermuteTensor(const ConstTensorHandle *tensor, const PermutationVector &permutationVector, void *permuteBuffer)
Definition: WorkloadUtils.cpp:18
Profiling.hpp
armnn::ITensorHandle
Definition: ITensorHandle.hpp:16
armnn::ITensorHandle::GetShape
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
armnn::MaxNumOfTensorDimensions
constexpr unsigned int MaxNumOfTensorDimensions
Definition: Types.hpp:31
armnn::Convert1HWOtoMIHW
std::tuple< ConstTensor, unsigned int > Convert1HWOtoMIHW(const ConstTensorHandle *weightTensor, const TensorInfo &inputInfo, const DataLayout &dataLayout, void *permuteBuffer)
Converts a (weights) tensor from [1, H, W, I*M] = [1, H, W, O] to [M, I, H, W].
Definition: WorkloadUtils.cpp:207
armnn::MemoryValidationException
Definition: Exceptions.hpp:158
armnn::TensorShape
Definition: Tensor.hpp:20
ITensorHandle.hpp
armnn::TensorShape::GetNumDimensions
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174
armnn::ConvertWeightTensorFromArmnnToAcl
armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstTensorHandle *weightTensor, DataLayout dataLayout, void *permuteBuffer)
Definition: WorkloadUtils.cpp:236
ARMNN_SCOPED_PROFILING_EVENT
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220
PolymorphicDowncast.hpp
armnn::CalculateGatherNdKeyIndices
std::map< std::string, unsigned int > CalculateGatherNdKeyIndices(TensorInfo inputInfo0, TensorInfo inputInfo1)
Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1)
Definition: WorkloadUtils.cpp:312
armnn::ITensorHandle::GetStrides
virtual TensorShape GetStrides() const =0
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
armnn::PermutationVector
Definition: Types.hpp:314
armnn::ITensorHandle::Unmap
virtual void Unmap() const =0
Unmap the tensor data.
armnn::GatherTensorHandlePairs
void GatherTensorHandlePairs(const DescriptorType &descriptor, std::vector< std::pair< SrcTensorHandleType *, DstTensorHandleType * >> &tensorHandlePairs)
Definition: WorkloadUtils.hpp:204
Permute.hpp
armnn::CopyTensorContentsGeneric
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
Definition: WorkloadUtils.hpp:46
Half.hpp
armnn::ReshapeWeightsForAcl
void ReshapeWeightsForAcl(TensorInfo &weightInfo, DataLayout dataLayout)
Definition: WorkloadUtils.cpp:47
Tensor.hpp
TensorHandle.hpp
armnn::GeneratePermutationVectorOnLastTwoDimensions
armnn::PermutationVector GeneratePermutationVectorOnLastTwoDimensions(unsigned int rank)
Generates a permutation vector of size rank that permutes the 2 most right dimensions.
Definition: WorkloadUtils.cpp:356
armnn::ConvertMaskToACLFormat
int32_t ConvertMaskToACLFormat(int32_t mask, int32_t numDim)
Definition: WorkloadUtils.cpp:298
armnn::ConvertWeightTensorInfoFromArmnnToAcl
TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo &weightInfo, DataLayout dataLayout)
Definition: WorkloadUtils.cpp:121
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnn::ConstTensor
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:329
armnn::ITensorHandle::Map
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.