ArmNN
 20.02
WorkloadUtils.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
8 #include "CpuTensorHandle.hpp"
9 
11 
12 #include <armnn/Tensor.hpp>
13 
14 #include <armnnUtils/Permute.hpp>
15 
16 #include <Half.hpp>
17 #include <Profiling.hpp>
18 
19 #include <boost/cast.hpp>
20 
21 namespace armnn
22 {
23 namespace
24 {
25 
26 template <typename ArrayType, typename Arg>
27 void AssignValues(unsigned int num, unsigned int& idx, const ArrayType& array, Arg& arg)
28 {
29  if (idx >= num)
30  {
31  return;
32  }
33 
34  arg = array[(num - 1) - idx];
35  idx++;
36 }
37 
38 template <typename T, typename ArrayType, typename... Args>
39 void AssignValues(unsigned int num, unsigned int idx, const ArrayType& array, T& assignee, Args&... args)
40 {
41  AssignValues(num, idx, array, assignee);
42 
43  AssignValues(num, idx, array, args...);
44 }
45 
46 } // anonymous namespace
47 
48 template <typename CopyFunc>
49 void CopyTensorContentsGeneric(const ITensorHandle* srcTensor, ITensorHandle* dstTensor, CopyFunc copy)
50 {
51  // For ease of understanding, names are assigned to the dimensions
52  // of the tensor as if NHWC, however this routine works with any 5D tensor
53  static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyTensorContents");
54 
55  TensorShape srcStrides = srcTensor->GetStrides();
56  const TensorShape& srcShape = srcTensor->GetShape();
57  const auto srcSize = srcTensor->GetStrides()[0] * srcShape[0];
58  IgnoreUnused(srcSize); // Only used for asserts
59  TensorShape dstStrides = dstTensor->GetStrides();
60  const TensorShape& dstShape = dstTensor->GetShape();
61  const auto dstSize = dstTensor->GetStrides()[0] * dstShape[0];
62  IgnoreUnused(dstSize); // Only used for asserts
63 
64  size_t srcDepth = 1;
65  size_t srcBatches = 1;
66  size_t srcHeight = 1;
67  size_t srcWidth = 1;
68  size_t srcChannels = 1;
69  AssignValues(srcShape.GetNumDimensions(),
70  0,
71  srcShape,
72  srcChannels,
73  srcWidth,
74  srcHeight,
75  srcBatches,
76  srcDepth);
77 
78  size_t srcDepthStride = 0;
79  size_t srcBatchStride = 0;
80  size_t srcHeightStride = 0;
81  size_t srcWidthStride = 0;
82  size_t srcChannelStride = 0;
83  AssignValues(srcStrides.GetNumDimensions(),
84  0,
85  srcStrides,
86  srcChannelStride,
87  srcWidthStride,
88  srcHeightStride,
89  srcBatchStride,
90  srcDepthStride);
91 
92  size_t dstDepth = 1;
93  size_t dstBatches = 1;
94  size_t dstHeight = 1;
95  size_t dstWidth = 1;
96  size_t dstChannels = 1;
97  AssignValues(dstShape.GetNumDimensions(),
98  0,
99  dstShape,
100  dstChannels,
101  dstWidth,
102  dstHeight,
103  dstBatches,
104  dstDepth);
105 
106  size_t dstDepthStride = 0;
107  size_t dstBatchStride = 0;
108  size_t dstHeightStride = 0;
109  size_t dstWidthStride = 0;
110  size_t dstChannelStride = 0;
111  AssignValues(dstStrides.GetNumDimensions(),
112  0,
113  dstStrides,
114  dstChannelStride,
115  dstWidthStride,
116  dstHeightStride,
117  dstBatchStride,
118  dstDepthStride);
119 
120  const unsigned char* srcDataStart;
121  unsigned char* dstDataStart;
122  {
123  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Synchronize buffers");
124  srcDataStart = static_cast<const uint8_t*>(srcTensor->Map());
125  dstDataStart = static_cast<uint8_t*>(dstTensor->Map());
126  }
127 
128  size_t copyLength = std::min(srcChannels * srcChannelStride, dstChannels * dstChannelStride);
129  size_t copyWidth = std::min(srcWidth, dstWidth);
130  size_t copyHeight = std::min(srcHeight, dstHeight);
131  size_t copyBatches = std::min(srcBatches, dstBatches);
132  size_t copyDepth = std::min(srcDepth, dstDepth);
133 
134  // Coalesce inner dimensions where possible
135  // to reduce overheard calling copy() and to
136  // allow for memory bandwidth optimisations
137  if (copyLength == srcWidthStride &&
138  copyLength == dstWidthStride)
139  {
140  // There is no special padding between rows,
141  // and sizes are compatible, so copy whole rows
142  copyLength *= copyWidth;
143  copyWidth = 1;
144 
145  if (copyLength == srcHeightStride &&
146  copyLength == dstHeightStride)
147  {
148  // There is no special padding between batches
149  // and sizes are compatible so copy whole batches
150  copyLength *= copyHeight;
151  copyHeight = 1;
152  }
153  }
154 
155  const unsigned char* srcData = srcDataStart;
156  unsigned char* dstData = dstDataStart;
157  for (unsigned int d = 0; d < copyDepth; ++d)
158  {
159  auto srcPtrDepth = srcData;
160  auto dstPtrDepth = dstData;
161  for (unsigned int b = 0; b < copyBatches; ++b)
162  {
163  auto srcPtrBatch = srcData;
164  auto dstPtrBatch = dstData;
165  for (unsigned int h = 0; h < copyHeight; ++h)
166  {
167  auto srcPtrChannel = srcData;
168  auto dstPtrChannel = dstData;
169  for (unsigned int w = 0; w < copyWidth; ++w)
170  {
171  BOOST_ASSERT(srcData >= srcDataStart && srcData + copyLength <= srcDataStart + srcSize);
172  BOOST_ASSERT(dstData >= dstDataStart && dstData + copyLength <= dstDataStart + dstSize);
173  copy(dstData, srcData, copyLength);
174  dstData += dstWidthStride;
175  srcData += srcWidthStride;
176  }
177  dstData += (static_cast<long>(dstHeightStride) - (dstData - dstPtrChannel));
178  srcData += (static_cast<long>(srcHeightStride) - (srcData - srcPtrChannel));
179  }
180  dstData += (static_cast<long>(dstBatchStride) - (dstData - dstPtrBatch));
181  srcData += (static_cast<long>(srcBatchStride) - (srcData - srcPtrBatch));
182  }
183  dstData += (static_cast<long>(dstDepthStride) - (dstData - dstPtrDepth));
184  srcData += (static_cast<long>(srcDepthStride) - (srcData - srcPtrDepth));
185  }
186 
187  srcTensor->Unmap();
188  dstTensor->Unmap();
189 }
190 
191 template <typename SrcTensorHandleType, typename DstTensorHandleType, typename DescriptorType>
192 void GatherTensorHandlePairs(const DescriptorType& descriptor,
193  std::vector<std::pair<SrcTensorHandleType*, DstTensorHandleType*>>& tensorHandlePairs)
194 {
195  const unsigned int numInputs = static_cast<unsigned int>(descriptor.m_Inputs.size());
196  tensorHandlePairs.reserve(numInputs);
197 
198  for (unsigned int i = 0; i < numInputs; ++i)
199  {
200  SrcTensorHandleType* const srcTensorHandle =
201  boost::polymorphic_downcast<SrcTensorHandleType*>(descriptor.m_Inputs[i]);
202  DstTensorHandleType* const dstTensorHandle =
203  boost::polymorphic_downcast<DstTensorHandleType*>(descriptor.m_Outputs[i]);
204 
205  tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle);
206  }
207 }
208 
209 int32_t ConvertMaskToACLFormat(int32_t mask, int32_t numDim);
210 
212  const PermutationVector& permutationVector,
213  void* permuteBuffer);
214 
215 void ReshapeWeightsForAcl(TensorInfo& weightInfo, DataLayout dataLayout);
216 
218 
220  DataLayout dataLayout,
221  void* permuteBuffer);
222 
223 } //namespace armnn
DataLayout
Definition: Types.hpp:49
armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstCpuTensorHandle *weightTensor, DataLayout dataLayout, void *permuteBuffer)
TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo &weightInfo, DataLayout dataLayout)
virtual TensorShape GetStrides() const =0
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
Copyright (c) 2020 ARM Limited.
void IgnoreUnused(Ts &&...)
armnn::ConstTensor PermuteTensor(const ConstCpuTensorHandle *tensor, const PermutationVector &permutationVector, void *permuteBuffer)
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:169
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:199
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
virtual void Unmap() const =0
Unmap the tensor data.
int32_t ConvertMaskToACLFormat(int32_t mask, int32_t numDim)
unsigned int GetNumDimensions() const
Definition: Tensor.hpp:43
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
void GatherTensorHandlePairs(const DescriptorType &descriptor, std::vector< std::pair< SrcTensorHandleType *, DstTensorHandleType *>> &tensorHandlePairs)
void ReshapeWeightsForAcl(TensorInfo &weightInfo, DataLayout dataLayout)
constexpr unsigned int MaxNumOfTensorDimensions
Definition: Types.hpp:18