From c577f2c6a3b4ddb6ba87a882723c53a248afbeba Mon Sep 17 00:00:00 2001 From: telsoa01 Date: Fri, 31 Aug 2018 09:22:23 +0100 Subject: Release 18.08 --- src/armnn/backends/ArmComputeTensorUtils.hpp | 97 +++++++++++++++++++--------- 1 file changed, 68 insertions(+), 29 deletions(-) (limited to 'src/armnn/backends/ArmComputeTensorUtils.hpp') diff --git a/src/armnn/backends/ArmComputeTensorUtils.hpp b/src/armnn/backends/ArmComputeTensorUtils.hpp index 84547f9c80..81c6620a01 100644 --- a/src/armnn/backends/ArmComputeTensorUtils.hpp +++ b/src/armnn/backends/ArmComputeTensorUtils.hpp @@ -20,26 +20,26 @@ class ITensorHandle; namespace armcomputetensorutils { -/// Utility function to map an armnn::DataType to corresponding arm_compute::DataType +/// Utility function to map an armnn::DataType to corresponding arm_compute::DataType. arm_compute::DataType GetArmComputeDataType(armnn::DataType dataType); -/// Utility function used to setup an arm_compute::TensorShape object from an armnn::TensorShape +/// Utility function used to setup an arm_compute::TensorShape object from an armnn::TensorShape. arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape); /// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given -/// armnn::ITensorInfo +/// armnn::ITensorInfo. arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo); -/// Utility function used to setup an arm_compute::PoolingLayerInfo object from an armnn::Pooling2dDescriptor +/// Utility function used to setup an arm_compute::PoolingLayerInfo object from an armnn::Pooling2dDescriptor. arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor); -/// Utility function to setup an arm_compute::NormalizationLayerInfo object from an armnn::NormalizationDescriptor +/// Utility function to setup an arm_compute::NormalizationLayerInfo object from an armnn::NormalizationDescriptor. arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& desc); -/// Utility function used to setup an arm_compute::PermutationVector object from an armnn::PermutationVector +/// Utility function used to setup an arm_compute::PermutationVector object from an armnn::PermutationVector. arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::PermutationVector& vector); -/// Utility function used to setup an arm_compute::PadStrideInfo object from an armnn layer descriptor +/// Utility function used to setup an arm_compute::PadStrideInfo object from an armnn layer descriptor. template arm_compute::PadStrideInfo BuildArmComputePadStrideInfo(const Descriptor &descriptor) { @@ -65,6 +65,16 @@ void InitialiseArmComputeTensorEmpty(Tensor& tensor) tensor.allocator()->allocate(); } +/// Utility function to free unused tensors after a workload is configured and prepared +template +void FreeTensorIfUnused(std::unique_ptr& tensor) +{ + if (tensor && !tensor->is_used()) + { + tensor.reset(nullptr); + } +} + // Helper function to obtain byte offset into tensor data inline size_t GetTensorOffset(const arm_compute::ITensorInfo& info, uint32_t batchIndex, @@ -73,14 +83,14 @@ inline size_t GetTensorOffset(const arm_compute::ITensorInfo& info, uint32_t x) { arm_compute::Coordinates coords; - coords.set(3, boost::numeric_cast(batchIndex)); - coords.set(2, boost::numeric_cast(channelIndex)); - coords.set(1, boost::numeric_cast(y)); - coords.set(0, boost::numeric_cast(x)); + coords.set(3, static_cast(batchIndex)); + coords.set(2, static_cast(channelIndex)); + coords.set(1, static_cast(y)); + coords.set(0, static_cast(x)); return info.offset_element_in_bytes(coords); } -// Helper function to obtain element offset into data buffer representing tensor data (assuming no strides) +// Helper function to obtain element offset into data buffer representing tensor data (assuming no strides). inline size_t GetLinearBufferOffset(const arm_compute::ITensorInfo& info, uint32_t batchIndex, uint32_t channelIndex, @@ -88,25 +98,25 @@ inline size_t GetLinearBufferOffset(const arm_compute::ITensorInfo& info, uint32_t x) { const arm_compute::TensorShape& shape = info.tensor_shape(); - uint32_t width = boost::numeric_cast(shape[0]); - uint32_t height = boost::numeric_cast(shape[1]); - uint32_t numChannels = boost::numeric_cast(shape[2]); + uint32_t width = static_cast(shape[0]); + uint32_t height = static_cast(shape[1]); + uint32_t numChannels = static_cast(shape[2]); return ((batchIndex * numChannels + channelIndex) * height + y) * width + x; } template void CopyArmComputeITensorData(const arm_compute::ITensor& srcTensor, T* dstData) { - // if MaxNumOfTensorDimensions is increased, this loop will need fixing + // If MaxNumOfTensorDimensions is increased, this loop will need fixing. static_assert(MaxNumOfTensorDimensions == 4, "Please update CopyArmComputeITensorData"); { const arm_compute::ITensorInfo& info = *srcTensor.info(); const arm_compute::TensorShape& shape = info.tensor_shape(); const uint8_t* const bufferPtr = srcTensor.buffer(); - uint32_t width = boost::numeric_cast(shape[0]); - uint32_t height = boost::numeric_cast(shape[1]); - uint32_t numChannels = boost::numeric_cast(shape[2]); - uint32_t numBatches = boost::numeric_cast(shape[3]); + uint32_t width = static_cast(shape[0]); + uint32_t height = static_cast(shape[1]); + uint32_t numChannels = static_cast(shape[2]); + uint32_t numBatches = static_cast(shape[3]); for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex) { @@ -114,8 +124,8 @@ void CopyArmComputeITensorData(const arm_compute::ITensor& srcTensor, T* dstData { for (unsigned int y = 0; y < height; ++y) { - // Copy one row from arm_compute tensor buffer to linear memory buffer - // A row is the largest contiguous region we can copy, as the tensor data may be using strides + // Copies one row from arm_compute tensor buffer to linear memory buffer. + // A row is the largest contiguous region we can copy, as the tensor data may be using strides. memcpy(dstData + GetLinearBufferOffset(info, batchIndex, channelIndex, y, 0), bufferPtr + GetTensorOffset(info, batchIndex, channelIndex, y, 0), width * sizeof(T)); @@ -128,16 +138,16 @@ void CopyArmComputeITensorData(const arm_compute::ITensor& srcTensor, T* dstData template void CopyArmComputeITensorData(const T* srcData, arm_compute::ITensor& dstTensor) { - // if MaxNumOfTensorDimensions is increased, this loop will need fixing + // If MaxNumOfTensorDimensions is increased, this loop will need fixing. static_assert(MaxNumOfTensorDimensions == 4, "Please update CopyArmComputeITensorData"); { const arm_compute::ITensorInfo& info = *dstTensor.info(); const arm_compute::TensorShape& shape = info.tensor_shape(); uint8_t* const bufferPtr = dstTensor.buffer(); - uint32_t width = boost::numeric_cast(shape[0]); - uint32_t height = boost::numeric_cast(shape[1]); - uint32_t numChannels = boost::numeric_cast(shape[2]); - uint32_t numBatches = boost::numeric_cast(shape[3]); + uint32_t width = static_cast(shape[0]); + uint32_t height = static_cast(shape[1]); + uint32_t numChannels = static_cast(shape[2]); + uint32_t numBatches = static_cast(shape[3]); for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex) { @@ -145,8 +155,8 @@ void CopyArmComputeITensorData(const T* srcData, arm_compute::ITensor& dstTensor { for (unsigned int y = 0; y < height; ++y) { - // Copy one row from linear memory buffer to arm_compute tensor buffer - // A row is the largest contiguous region we can copy, as the tensor data may be using strides + // Copies one row from linear memory buffer to arm_compute tensor buffer. + // A row is the largest contiguous region we can copy, as the tensor data may be using strides. memcpy(bufferPtr + GetTensorOffset(info, batchIndex, channelIndex, y, 0), srcData + GetLinearBufferOffset(info, batchIndex, channelIndex, y, 0), width * sizeof(T)); @@ -156,5 +166,34 @@ void CopyArmComputeITensorData(const T* srcData, arm_compute::ITensor& dstTensor } } +/// Construct a TensorShape object from an ArmCompute object based on arm_compute::Dimensions. +/// \tparam ArmComputeType Any type that implements the Dimensions interface +/// \tparam T Shape value type +/// \param shapelike An ArmCompute object that implements the Dimensions interface +/// \param initial A default value to initialise the shape with +/// \return A TensorShape object filled from the Acl shapelike object. +template +TensorShape GetTensorShape(const ArmComputeType& shapelike, T initial) +{ + std::vector s(MaxNumOfTensorDimensions, initial); + for (unsigned int i=0; i < shapelike.num_dimensions(); ++i) + { + s[(shapelike.num_dimensions()-1)-i] = boost::numeric_cast(shapelike[i]); + } + return TensorShape(boost::numeric_cast(shapelike.num_dimensions()), s.data()); +}; + +/// Get the strides from an ACL strides object +inline TensorShape GetStrides(const arm_compute::Strides& strides) +{ + return GetTensorShape(strides, 0U); +} + +/// Get the shape from an ACL shape object +inline TensorShape GetShape(const arm_compute::TensorShape& shape) +{ + return GetTensorShape(shape, 1U); +} + } // namespace armcomputetensorutils } // namespace armnn -- cgit v1.2.1