From dba634fd6a66a9e033a1925b0b26c80b270bbf21 Mon Sep 17 00:00:00 2001 From: Matthew Jackson Date: Thu, 15 Aug 2019 15:14:18 +0100 Subject: IVGCVSW-3639 Add 5d tensor support * Increased MaxNumOfTensorDimensions and fixed issues related to its use * Fixed issues caused by assuming 5d tensors are invalid * Updated ArmComputeTensorUtils for 5d tensors * Added 5d tensor unit tests for add, mul, stack and reshape (needed by IVGCVSW-3527) Signed-off-by: Matthew Jackson Change-Id: I5bcd64942d0d04efcc6c5acb240ad4b88e010743 --- src/backends/aclCommon/ArmComputeTensorUtils.hpp | 52 +++++++++++++++--------- 1 file changed, 33 insertions(+), 19 deletions(-) (limited to 'src/backends/aclCommon/ArmComputeTensorUtils.hpp') diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.hpp b/src/backends/aclCommon/ArmComputeTensorUtils.hpp index c9587a7990..34565fe425 100644 --- a/src/backends/aclCommon/ArmComputeTensorUtils.hpp +++ b/src/backends/aclCommon/ArmComputeTensorUtils.hpp @@ -108,12 +108,14 @@ void FreeTensorIfUnused(std::unique_ptr& tensor) // Helper function to obtain byte offset into tensor data inline size_t GetTensorOffset(const arm_compute::ITensorInfo& info, + uint32_t depthIndex, uint32_t batchIndex, uint32_t channelIndex, uint32_t y, uint32_t x) { arm_compute::Coordinates coords; + coords.set(4, static_cast(depthIndex)); coords.set(3, static_cast(batchIndex)); coords.set(2, static_cast(channelIndex)); coords.set(1, static_cast(y)); @@ -123,6 +125,7 @@ inline size_t GetTensorOffset(const arm_compute::ITensorInfo& info, // Helper function to obtain element offset into data buffer representing tensor data (assuming no strides). inline size_t GetLinearBufferOffset(const arm_compute::ITensorInfo& info, + uint32_t depthIndex, uint32_t batchIndex, uint32_t channelIndex, uint32_t y, @@ -132,14 +135,15 @@ inline size_t GetLinearBufferOffset(const arm_compute::ITensorInfo& info, uint32_t width = static_cast(shape[0]); uint32_t height = static_cast(shape[1]); uint32_t numChannels = static_cast(shape[2]); - return ((batchIndex * numChannels + channelIndex) * height + y) * width + x; + uint32_t numBatches = static_cast(shape[3]); + return (((depthIndex * numBatches + batchIndex) * numChannels + channelIndex) * height + y) * width + x; } template void CopyArmComputeITensorData(const arm_compute::ITensor& srcTensor, T* dstData) { // If MaxNumOfTensorDimensions is increased, this loop will need fixing. - static_assert(MaxNumOfTensorDimensions == 4, "Please update CopyArmComputeITensorData"); + static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyArmComputeITensorData"); { const arm_compute::ITensorInfo& info = *srcTensor.info(); const arm_compute::TensorShape& shape = info.tensor_shape(); @@ -148,18 +152,23 @@ void CopyArmComputeITensorData(const arm_compute::ITensor& srcTensor, T* dstData uint32_t height = static_cast(shape[1]); uint32_t numChannels = static_cast(shape[2]); uint32_t numBatches = static_cast(shape[3]); + uint32_t depth = static_cast(shape[4]); - for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex) + for (unsigned int depthIndex = 0; depthIndex < depth; ++depthIndex) { - for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex) + for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex) { - for (unsigned int y = 0; y < height; ++y) + for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex) { - // Copies one row from arm_compute tensor buffer to linear memory buffer. - // A row is the largest contiguous region we can copy, as the tensor data may be using strides. - memcpy(dstData + GetLinearBufferOffset(info, batchIndex, channelIndex, y, 0), - bufferPtr + GetTensorOffset(info, batchIndex, channelIndex, y, 0), - width * sizeof(T)); + for (unsigned int y = 0; y < height; ++y) + { + // Copies one row from arm_compute tensor buffer to linear memory buffer. + // A row is the largest contiguous region we can copy, as the tensor data may be using strides. + memcpy( + dstData + GetLinearBufferOffset(info, depthIndex, batchIndex, channelIndex, y, 0), + bufferPtr + GetTensorOffset(info, depthIndex, batchIndex, channelIndex, y, 0), + width * sizeof(T)); + } } } } @@ -170,7 +179,7 @@ template void CopyArmComputeITensorData(const T* srcData, arm_compute::ITensor& dstTensor) { // If MaxNumOfTensorDimensions is increased, this loop will need fixing. - static_assert(MaxNumOfTensorDimensions == 4, "Please update CopyArmComputeITensorData"); + static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyArmComputeITensorData"); { const arm_compute::ITensorInfo& info = *dstTensor.info(); const arm_compute::TensorShape& shape = info.tensor_shape(); @@ -179,18 +188,23 @@ void CopyArmComputeITensorData(const T* srcData, arm_compute::ITensor& dstTensor uint32_t height = static_cast(shape[1]); uint32_t numChannels = static_cast(shape[2]); uint32_t numBatches = static_cast(shape[3]); + uint32_t depth = static_cast(shape[4]); - for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex) + for (unsigned int depthIndex = 0; depthIndex < depth; ++depthIndex) { - for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex) + for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex) { - for (unsigned int y = 0; y < height; ++y) + for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex) { - // Copies one row from linear memory buffer to arm_compute tensor buffer. - // A row is the largest contiguous region we can copy, as the tensor data may be using strides. - memcpy(bufferPtr + GetTensorOffset(info, batchIndex, channelIndex, y, 0), - srcData + GetLinearBufferOffset(info, batchIndex, channelIndex, y, 0), - width * sizeof(T)); + for (unsigned int y = 0; y < height; ++y) + { + // Copies one row from linear memory buffer to arm_compute tensor buffer. + // A row is the largest contiguous region we can copy, as the tensor data may be using strides. + memcpy( + bufferPtr + GetTensorOffset(info, depthIndex, batchIndex, channelIndex, y, 0), + srcData + GetLinearBufferOffset(info, depthIndex, batchIndex, channelIndex, y, 0), + width * sizeof(T)); + } } } } -- cgit v1.2.1