ArmNN
 22.05
WorkloadUtils.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
10 #include <armnn/Tensor.hpp>
12 #include <armnnUtils/Permute.hpp>
13 
14 #include <Half.hpp>
15 #include <Profiling.hpp>
16 
17 
18 namespace armnn
19 {
20 namespace
21 {
22 
23 template <typename ArrayType, typename Arg>
24 void AssignValues(unsigned int num, unsigned int& idx, const ArrayType& array, Arg& arg)
25 {
26  if (idx >= num)
27  {
28  return;
29  }
30 
31  arg = array[(num - 1) - idx];
32  idx++;
33 }
34 
35 template <typename T, typename ArrayType, typename... Args>
36 void AssignValues(unsigned int num, unsigned int idx, const ArrayType& array, T& assignee, Args&... args)
37 {
38  AssignValues(num, idx, array, assignee);
39 
40  AssignValues(num, idx, array, args...);
41 }
42 
43 } // anonymous namespace
44 
45 template <typename CopyFunc>
46 void CopyTensorContentsGeneric(const ITensorHandle* srcTensor, ITensorHandle* dstTensor, CopyFunc copy)
47 {
48  // For ease of understanding, names are assigned to the dimensions
49  // of the tensor as if NHWC, however this routine works with any 5D tensor
50  static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyTensorContents");
51 
52  TensorShape srcStrides = srcTensor->GetStrides();
53  const TensorShape& srcShape = srcTensor->GetShape();
54  const auto srcSize = srcTensor->GetStrides()[0] * srcShape[0];
55  IgnoreUnused(srcSize); // Only used for asserts
56  TensorShape dstStrides = dstTensor->GetStrides();
57  const TensorShape& dstShape = dstTensor->GetShape();
58  const auto dstSize = dstTensor->GetStrides()[0] * dstShape[0];
59  IgnoreUnused(dstSize); // Only used for asserts
60 
61  size_t srcDepth = 1;
62  size_t srcBatches = 1;
63  size_t srcHeight = 1;
64  size_t srcWidth = 1;
65  size_t srcChannels = 1;
66  AssignValues(srcShape.GetNumDimensions(),
67  0,
68  srcShape,
69  srcChannels,
70  srcWidth,
71  srcHeight,
72  srcBatches,
73  srcDepth);
74 
75  size_t srcDepthStride = 0;
76  size_t srcBatchStride = 0;
77  size_t srcHeightStride = 0;
78  size_t srcWidthStride = 0;
79  size_t srcChannelStride = 0;
80  AssignValues(srcStrides.GetNumDimensions(),
81  0,
82  srcStrides,
83  srcChannelStride,
84  srcWidthStride,
85  srcHeightStride,
86  srcBatchStride,
87  srcDepthStride);
88 
89  size_t dstDepth = 1;
90  size_t dstBatches = 1;
91  size_t dstHeight = 1;
92  size_t dstWidth = 1;
93  size_t dstChannels = 1;
94  AssignValues(dstShape.GetNumDimensions(),
95  0,
96  dstShape,
97  dstChannels,
98  dstWidth,
99  dstHeight,
100  dstBatches,
101  dstDepth);
102 
103  size_t dstDepthStride = 0;
104  size_t dstBatchStride = 0;
105  size_t dstHeightStride = 0;
106  size_t dstWidthStride = 0;
107  size_t dstChannelStride = 0;
108  AssignValues(dstStrides.GetNumDimensions(),
109  0,
110  dstStrides,
111  dstChannelStride,
112  dstWidthStride,
113  dstHeightStride,
114  dstBatchStride,
115  dstDepthStride);
116 
117  const unsigned char* srcDataStart;
118  unsigned char* dstDataStart;
119  {
120  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Synchronize buffers");
121  srcDataStart = static_cast<const uint8_t*>(srcTensor->Map());
122  dstDataStart = static_cast<uint8_t*>(dstTensor->Map());
123  }
124 
125  size_t copyLength = std::min(srcChannels * srcChannelStride, dstChannels * dstChannelStride);
126  size_t copyWidth = std::min(srcWidth, dstWidth);
127  size_t copyHeight = std::min(srcHeight, dstHeight);
128  size_t copyBatches = std::min(srcBatches, dstBatches);
129  size_t copyDepth = std::min(srcDepth, dstDepth);
130 
131  // Coalesce inner dimensions where possible
132  // to reduce overheard calling copy() and to
133  // allow for memory bandwidth optimisations
134  if (copyLength == srcWidthStride &&
135  copyLength == dstWidthStride)
136  {
137  // There is no special padding between rows,
138  // and sizes are compatible, so copy whole rows
139  copyLength *= copyWidth;
140  copyWidth = 1;
141 
142  if (copyLength == srcHeightStride &&
143  copyLength == dstHeightStride)
144  {
145  // There is no special padding between batches
146  // and sizes are compatible so copy whole batches
147  copyLength *= copyHeight;
148  copyHeight = 1;
149  }
150  }
151 
152  const unsigned char* srcData = srcDataStart;
153  unsigned char* dstData = dstDataStart;
154  for (unsigned int d = 0; d < copyDepth; ++d)
155  {
156  auto srcPtrDepth = srcData;
157  auto dstPtrDepth = dstData;
158  for (unsigned int b = 0; b < copyBatches; ++b)
159  {
160  auto srcPtrBatch = srcData;
161  auto dstPtrBatch = dstData;
162  for (unsigned int h = 0; h < copyHeight; ++h)
163  {
164  auto srcPtrChannel = srcData;
165  auto dstPtrChannel = dstData;
166  for (unsigned int w = 0; w < copyWidth; ++w)
167  {
168  ARMNN_ASSERT(srcData >= srcDataStart && srcData + copyLength <= srcDataStart + srcSize);
169  ARMNN_ASSERT(dstData >= dstDataStart && dstData + copyLength <= dstDataStart + dstSize);
170  copy(dstData, srcData, copyLength);
171  dstData += dstWidthStride;
172  srcData += srcWidthStride;
173  }
174  dstData += (static_cast<long>(dstHeightStride) - (dstData - dstPtrChannel));
175  srcData += (static_cast<long>(srcHeightStride) - (srcData - srcPtrChannel));
176  }
177  dstData += (static_cast<long>(dstBatchStride) - (dstData - dstPtrBatch));
178  srcData += (static_cast<long>(srcBatchStride) - (srcData - srcPtrBatch));
179  }
180  dstData += (static_cast<long>(dstDepthStride) - (dstData - dstPtrDepth));
181  srcData += (static_cast<long>(srcDepthStride) - (srcData - srcPtrDepth));
182  }
183 
184  srcTensor->Unmap();
185  dstTensor->Unmap();
186 }
187 
188 template <typename SrcTensorHandleType, typename DstTensorHandleType, typename DescriptorType>
189 void GatherTensorHandlePairs(const DescriptorType& descriptor,
190  std::vector<std::pair<SrcTensorHandleType*, DstTensorHandleType*>>& tensorHandlePairs)
191 {
192  const unsigned int numInputs = static_cast<unsigned int>(descriptor.m_Inputs.size());
193  tensorHandlePairs.reserve(numInputs);
194 
195  for (unsigned int i = 0; i < numInputs; ++i)
196  {
197  SrcTensorHandleType* const srcTensorHandle =
198  PolymorphicDowncast<SrcTensorHandleType*>(descriptor.m_Inputs[i]);
199  DstTensorHandleType* const dstTensorHandle =
200  PolymorphicDowncast<DstTensorHandleType*>(descriptor.m_Outputs[i]);
201 
202  tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle);
203  }
204 }
205 
206 int32_t ConvertMaskToACLFormat(int32_t mask, int32_t numDim);
207 
209  const PermutationVector& permutationVector,
210  void* permuteBuffer);
211 
212 void ReshapeWeightsForAcl(TensorInfo& weightInfo, DataLayout dataLayout);
213 
215 
216 /// Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M]
217 /// This function coverts a TensorInfo from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC)
218 /// as required by the compute library
219 /// Returns a tuple of converted weights tensor info and depth multiplier
220 std::tuple<TensorInfo, unsigned int> Convert1HWOTensorInfoToAcl(const TensorInfo& weightInfo,
221  const TensorInfo& inputInfo,
222  const DataLayout dataLayout);
223 
225  DataLayout dataLayout,
226  void* permuteBuffer);
227 
228 /// Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M]
229 /// This function coverts a ConstCpuTensorHandle from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or
230 /// keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library
231 ///
232 /// \param weightTensor - ConstTensorHandle of weights tensor
233 /// \param inputInfo - TensorInfo of input tensor
234 /// \param dataLayout - DataLayout of the input tensor
235 /// \param permuteBuffer - Pointer to memory with the size of tensor. Used for the permutation
236 /// \return tuple of transformed weights-ConstTensor and depthwise multiplier
237 std::tuple<ConstTensor, unsigned int> Convert1HWOTensorToAcl(const ConstTensorHandle* weightTensor,
238  const TensorInfo& inputInfo,
239  const DataLayout dataLayout,
240  void* permuteBuffer);
241 
242 /// Converts a (weights) tensor from [1, H, W, I*M] = [1, H, W, O] to [M, I, H, W]
243 ///
244 /// \param weightTensor - ConstTensorHandle of the weight tensor that should be converted
245 /// \param inputInfo - TensorInfo of the corresponding input tensor
246 /// \param dataLayout - DataLayout of the input tensor e.g. NHWC or NCHW
247 /// \param permuteBuffer - Memory location with the same size as the weight tensor to write converted data to
248 /// \return - A tuple of ConstTensor and unsigned int which is the converted weightTensor and the depthMultiplier
249 std::tuple<ConstTensor, unsigned int> Convert1HWOtoMIHW(const ConstTensorHandle* weightTensor,
250  const TensorInfo& inputInfo,
251  const DataLayout& dataLayout,
252  void* permuteBuffer);
253 
254 /// Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1)
255 ///
256 /// \param inputInfo0 - TensorInfo of the corresponding input tensor: params
257 /// \param inputInfo1 - TensorInfo of the corresponding input tensor: indices
258 /// \return - A map with names and values for N, ND, K, W, C
259 std::map<std::string, unsigned int> CalculateGatherNdKeyIndices(TensorInfo inputInfo0, TensorInfo inputInfo1);
260 
261 } //namespace armnn
armnn::ConstTensor PermuteTensor(const ConstTensorHandle *tensor, const PermutationVector &permutationVector, void *permuteBuffer)
DataLayout
Definition: Types.hpp:62
std::map< std::string, unsigned int > CalculateGatherNdKeyIndices(TensorInfo inputInfo0, TensorInfo inputInfo1)
Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1) ...
TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo &weightInfo, DataLayout dataLayout)
virtual TensorShape GetStrides() const =0
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstTensorHandle *weightTensor, DataLayout dataLayout, void *permuteBuffer)
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220
std::tuple< ConstTensor, unsigned int > Convert1HWOTensorToAcl(const ConstTensorHandle *weightTensor, const TensorInfo &inputInfo, const DataLayout dataLayout, void *permuteBuffer)
Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a ConstCpuTe...
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
std::tuple< TensorInfo, unsigned int > Convert1HWOTensorInfoToAcl(const TensorInfo &weightInfo, const TensorInfo &inputInfo, const DataLayout dataLayout)
Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a TensorInfo...
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
virtual void Unmap() const =0
Unmap the tensor data.
int32_t ConvertMaskToACLFormat(int32_t mask, int32_t numDim)
std::tuple< ConstTensor, unsigned int > Convert1HWOtoMIHW(const ConstTensorHandle *weightTensor, const TensorInfo &inputInfo, const DataLayout &dataLayout, void *permuteBuffer)
Converts a (weights) tensor from [1, H, W, I*M] = [1, H, W, O] to [M, I, H, W].
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
void GatherTensorHandlePairs(const DescriptorType &descriptor, std::vector< std::pair< SrcTensorHandleType *, DstTensorHandleType *>> &tensorHandlePairs)
void ReshapeWeightsForAcl(TensorInfo &weightInfo, DataLayout dataLayout)
constexpr unsigned int MaxNumOfTensorDimensions
Definition: Types.hpp:31