From 53ef79504b4c881c572735393c2eede5fa556c46 Mon Sep 17 00:00:00 2001 From: Jan Eilers Date: Wed, 2 Jun 2021 12:01:25 +0100 Subject: IVGCVSW-5826 Change weights layout for depthwise to [1,H,W,I*M] * This change is necessary because tflite uses a [1,H,W,I*M] format and uses the I*M dimension for per axis quantization. Our previous layout [M,I,H,W] can't handle the correlating quantization scales. * Updates Onnx-, TfLiteParser and TfliteDelegate * Updates the CpuRef, CpuAcc and GpuAcc backends * Adjusts unit tests * Adds test to ensure models with old layout can still be read and executed * Adds conversion function to previous layout [1,H,W,I*M] --> [M,I,H,W] which can be used by backend developers !android-nn-driver:5553 Signed-off-by: Jan Eilers Change-Id: Ifef23368b8c3702cf315a5838d214f7dc13c0152 --- src/backends/reference/test/CMakeLists.txt | 2 + .../reference/test/RefPerAxisIteratorTests.cpp | 252 +++++++++++++++++++++ .../reference/test/RefPerChannelDecoderTests.cpp | 156 +++++++++++++ src/backends/reference/workloads/BaseIterator.hpp | 180 +++++++-------- src/backends/reference/workloads/ConvImpl.cpp | 31 ++- src/backends/reference/workloads/Decoders.hpp | 16 +- .../reference/workloads/TransposeConvolution2d.cpp | 2 +- 7 files changed, 508 insertions(+), 131 deletions(-) create mode 100644 src/backends/reference/test/RefPerAxisIteratorTests.cpp create mode 100644 src/backends/reference/test/RefPerChannelDecoderTests.cpp (limited to 'src/backends/reference') diff --git a/src/backends/reference/test/CMakeLists.txt b/src/backends/reference/test/CMakeLists.txt index 76541cfdaa..d7c5da896a 100644 --- a/src/backends/reference/test/CMakeLists.txt +++ b/src/backends/reference/test/CMakeLists.txt @@ -13,6 +13,8 @@ list(APPEND armnnRefBackendUnitTests_sources RefLayerTests.cpp RefMemoryManagerTests.cpp RefOptimizedNetworkTests.cpp + RefPerAxisIteratorTests.cpp + RefPerChannelDecoderTests.cpp RefRuntimeTests.cpp RefTensorHandleTests.cpp RefWorkloadFactoryHelper.hpp diff --git a/src/backends/reference/test/RefPerAxisIteratorTests.cpp b/src/backends/reference/test/RefPerAxisIteratorTests.cpp new file mode 100644 index 0000000000..7da4c0fb0f --- /dev/null +++ b/src/backends/reference/test/RefPerAxisIteratorTests.cpp @@ -0,0 +1,252 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include + +#include + +#include +#include + + +template +void CompareVector(std::vector vec1, std::vector vec2) +{ + BOOST_TEST(vec1.size() == vec2.size()); + + bool mismatch = false; + for (uint i = 0; i < vec1.size(); ++i) + { + if (vec1[i] != vec2[i]) + { + /*std::stringstream ss; + ss << "Vector value mismatch: index=" << i << " " << vec1[i] << "!=" << vec2[i];*/ + BOOST_TEST_MESSAGE(fmt::format("Vector value mismatch: index={} {} != {}", + i, + vec1[i], + vec2[i])); + mismatch = true; + } + } + + if (mismatch) + { + BOOST_FAIL("Error in CompareVector. Vectors don't match."); + } +} + +using namespace armnn; + +// Basically a per axis decoder but without any decoding/quantization +class MockPerAxisIterator : public PerAxisIterator> +{ +public: + MockPerAxisIterator(const int8_t* data, const armnn::TensorShape& tensorShape, const unsigned int axis) + : PerAxisIterator(data, tensorShape, axis), m_NumElements(tensorShape.GetNumElements()) + {} + + int8_t Get() const override + { + return *m_Iterator; + } + + virtual std::vector DecodeTensor(const TensorShape &tensorShape, + bool isDepthwise = false) override + { + IgnoreUnused(tensorShape, isDepthwise); + return std::vector{}; + }; + + // Iterates over data using operator[] and returns vector + std::vector Loop() + { + std::vector vec; + for (uint32_t i = 0; i < m_NumElements; ++i) + { + this->operator[](i); + vec.emplace_back(Get()); + } + return vec; + } + + unsigned int GetAxisIndex() + { + return m_AxisIndex; + } + unsigned int m_NumElements; +}; + +BOOST_AUTO_TEST_SUITE(RefPerAxisIterator) + +// Test Loop (Equivalent to DecodeTensor) and Axis = 0 +BOOST_AUTO_TEST_CASE(PerAxisIteratorTest1) +{ + std::vector input = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + TensorInfo tensorInfo ({3,1,2,2},DataType::QSymmS8); + + // test axis=0 + std::vector expOutput = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + auto iterator = MockPerAxisIterator(input.data(), tensorInfo.GetShape(), 0); + std::vector output = iterator.Loop(); + CompareVector(output, expOutput); + + // Set iterator to index and check if the axis index is correct + iterator[5]; + BOOST_TEST(iterator.GetAxisIndex() == 1u); + + iterator[1]; + BOOST_TEST(iterator.GetAxisIndex() == 0u); + + iterator[10]; + BOOST_TEST(iterator.GetAxisIndex() == 2u); +} + +// Test Axis = 1 +BOOST_AUTO_TEST_CASE(PerAxisIteratorTest2) +{ + std::vector input = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + TensorInfo tensorInfo ({3,1,2,2},DataType::QSymmS8); + + // test axis=1 + std::vector expOutput = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + auto iterator = MockPerAxisIterator(input.data(), tensorInfo.GetShape(), 1); + std::vector output = iterator.Loop(); + CompareVector(output, expOutput); + + // Set iterator to index and check if the axis index is correct + iterator[5]; + BOOST_TEST(iterator.GetAxisIndex() == 0u); + + iterator[1]; + BOOST_TEST(iterator.GetAxisIndex() == 0u); + + iterator[10]; + BOOST_TEST(iterator.GetAxisIndex() == 0u); +} + +// Test Axis = 2 +BOOST_AUTO_TEST_CASE(PerAxisIteratorTest3) +{ + std::vector input = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + TensorInfo tensorInfo ({3,1,2,2},DataType::QSymmS8); + + // test axis=2 + std::vector expOutput = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + auto iterator = MockPerAxisIterator(input.data(), tensorInfo.GetShape(), 2); + std::vector output = iterator.Loop(); + CompareVector(output, expOutput); + + // Set iterator to index and check if the axis index is correct + iterator[5]; + BOOST_TEST(iterator.GetAxisIndex() == 0u); + + iterator[1]; + BOOST_TEST(iterator.GetAxisIndex() == 0u); + + iterator[10]; + BOOST_TEST(iterator.GetAxisIndex() == 1u); +} + +// Test Axis = 3 +BOOST_AUTO_TEST_CASE(PerAxisIteratorTest4) +{ + std::vector input = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + TensorInfo tensorInfo ({3,1,2,2},DataType::QSymmS8); + + // test axis=3 + std::vector expOutput = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + auto iterator = MockPerAxisIterator(input.data(), tensorInfo.GetShape(), 3); + std::vector output = iterator.Loop(); + CompareVector(output, expOutput); + + // Set iterator to index and check if the axis index is correct + iterator[5]; + BOOST_TEST(iterator.GetAxisIndex() == 1u); + + iterator[1]; + BOOST_TEST(iterator.GetAxisIndex() == 1u); + + iterator[10]; + BOOST_TEST(iterator.GetAxisIndex() == 0u); +} + + +// Test Axis = 1. Different tensor shape +BOOST_AUTO_TEST_CASE(PerAxisIteratorTest5) +{ + using namespace armnn; + std::vector input = + { + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, 14, 15 + }; + + std::vector expOutput = + { + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, 14, 15 + }; + + TensorInfo tensorInfo ({2,2,2,2},DataType::QSymmS8); + auto iterator = MockPerAxisIterator(input.data(), tensorInfo.GetShape(), 1); + std::vector output = iterator.Loop(); + CompareVector(output, expOutput); + + // Set iterator to index and check if the axis index is correct + iterator[5]; + BOOST_TEST(iterator.GetAxisIndex() == 1u); + + iterator[1]; + BOOST_TEST(iterator.GetAxisIndex() == 0u); + + iterator[10]; + BOOST_TEST(iterator.GetAxisIndex() == 0u); +} + +// Test the increment and decrement operator +BOOST_AUTO_TEST_CASE(PerAxisIteratorTest7) +{ + using namespace armnn; + std::vector input = + { + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11 + }; + + std::vector expOutput = + { + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11 + }; + + TensorInfo tensorInfo ({3,1,2,2},DataType::QSymmS8); + auto iterator = MockPerAxisIterator(input.data(), tensorInfo.GetShape(), 2); + + iterator += 3; + BOOST_TEST(iterator.Get(), expOutput[3]); + BOOST_TEST(iterator.GetAxisIndex() == 1u); + + iterator += 3; + BOOST_TEST(iterator.Get(), expOutput[6]); + BOOST_TEST(iterator.GetAxisIndex() == 1u); + + iterator -= 2; + BOOST_TEST(iterator.Get(), expOutput[4]); + BOOST_TEST(iterator.GetAxisIndex() == 0u); + + iterator -= 1; + BOOST_TEST(iterator.Get(), expOutput[3]); + BOOST_TEST(iterator.GetAxisIndex() == 1u); +} + + +BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file diff --git a/src/backends/reference/test/RefPerChannelDecoderTests.cpp b/src/backends/reference/test/RefPerChannelDecoderTests.cpp new file mode 100644 index 0000000000..c2e3cee7a0 --- /dev/null +++ b/src/backends/reference/test/RefPerChannelDecoderTests.cpp @@ -0,0 +1,156 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include + +#include + +#include + +BOOST_AUTO_TEST_SUITE(RefPerChannelDecoder) + +template +void CompareVector(std::vector vec1, std::vector vec2) +{ + BOOST_TEST(vec1.size() == vec2.size()); + + bool mismatch = false; + for (uint i = 0; i < vec1.size(); ++i) + { + if (vec1[i] != vec2[i]) + { + /*std::stringstream ss; + ss << "Vector value mismatch: index=" << i << " " << vec1[i] << "!=" << vec2[i];*/ + BOOST_TEST_MESSAGE(fmt::format("Vector value mismatch: index={} {} != {}", + i, + vec1[i], + vec2[i])); + mismatch = true; + } + } + + if (mismatch) + { + BOOST_FAIL("Error in CompareVector. Vectors don't match."); + } +} + +// Ensure quantization works for none depthwise convolutions +BOOST_AUTO_TEST_CASE(RefPerChannelDecoderTest1) +{ + using namespace armnn; + std::vector input = + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 + }; + + std::vector expOutput = + { + 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, + 24.0f, 26.0f, 28.0f, 30.0f, 32.0f, 34.0f, 36.0f, 38.0f, 40.0f, 42.0f, 44.0f, 46.0f + }; + + TensorInfo tensorInfo ({2,2,2,3},DataType::QSymmS8,{1.0f, 2.0f},0); + auto decoder = MakeDecoder(tensorInfo, input.data()); + + std::vector output = decoder->DecodeTensor(tensorInfo.GetShape()); + + CompareVector(output, expOutput); +} + +// Ensure quantization works for depthwise convolutions M=1 +BOOST_AUTO_TEST_CASE(RefPerChannelDecoderTest2) +{ + using namespace armnn; + std::vector input = + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + }; + + std::vector expOutput = + { + 0.0f, 1.0f, 2.0f, 3.0f, + 8.0f, 10.0f, 12.0f, 14.0f, + 24.0f, 27.0f, 30.0f, 33.0f, + 48.0f, 52.0f, 56.0f, 60.0f + }; + + // [O,1,H,W] = [I*M,1,H,W] = [4*1,1,2,2] + TensorInfo tensorInfo ({4,1,2,2},DataType::QSymmS8,{1.0f, 2.0f, 3.0f, 4.0f},0); + auto decoder = MakeDecoder(tensorInfo, input.data()); + + std::vector output = decoder->DecodeTensor(tensorInfo.GetShape(), true); + + CompareVector(output, expOutput); +} + +// Ensure quantization works for depthwise convolutions M=2 +BOOST_AUTO_TEST_CASE(RefPerChannelDecoderTest3) +{ + using namespace armnn; + std::vector input = + { + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, 14, 15, + 16, 17, 18, 19, + 20, 21, 22, 23 + }; + + std::vector expOutput = + { + 0.0f, 1.0f, 2.0f, 3.0f, + 8.0f, 10.0f, 12.0f, 14.0f, + 24.0f, 27.0f, 30.0f, 33.0f, + 48.0f, 52.0f, 56.0f, 60.0f, + 80.0f, 85.0f, 90.0f, 95.0f, + 120.0f, 126.0f, 132.0f, 138.0f + }; + + // [O,1,H,W] = [I*M,1,H,W] = [3*2,1,2,2] + TensorInfo tensorInfo ({6,1,2,2},DataType::QSymmS8,{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f},0); + auto decoder = MakeDecoder(tensorInfo, input.data()); + + std::vector output = decoder->DecodeTensor(tensorInfo.GetShape(), true); + + CompareVector(output, expOutput); +} + +// Ensure quantization works for depthwise convolutions M=2 for int32 +BOOST_AUTO_TEST_CASE(RefPerChannelDecoderTest4) +{ + using namespace armnn; + std::vector input = + { + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, 14, 15, + 16, 17, 18, 19, + 20, 21, 22, 23 + }; + + std::vector expOutput = + { + 0.0f, 1.0f, 2.0f, 3.0f, + 8.0f, 10.0f, 12.0f, 14.0f, + 24.0f, 27.0f, 30.0f, 33.0f, + 48.0f, 52.0f, 56.0f, 60.0f, + 80.0f, 85.0f, 90.0f, 95.0f, + 120.0f, 126.0f, 132.0f, 138.0f + }; + + // [O,1,H,W] = [I*M,1,H,W] = [3*2,1,2,2] + TensorInfo tensorInfo ({6,1,2,2},DataType::Signed32,{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f},0); + auto decoder = MakeDecoder(tensorInfo, input.data()); + + std::vector output = decoder->DecodeTensor(tensorInfo.GetShape(), true); + + CompareVector(output, expOutput); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/reference/workloads/BaseIterator.hpp b/src/backends/reference/workloads/BaseIterator.hpp index 73e24691d9..483ef720f9 100644 --- a/src/backends/reference/workloads/BaseIterator.hpp +++ b/src/backends/reference/workloads/BaseIterator.hpp @@ -8,7 +8,9 @@ #include #include #include +#include #include +#include #include @@ -22,8 +24,6 @@ public: virtual ~BaseIterator() {} - virtual BaseIterator& SetIndex(unsigned int index, unsigned int axisIndex = 0) = 0; - virtual BaseIterator& operator++() = 0; virtual BaseIterator& operator+=(const unsigned int increment) = 0; @@ -47,7 +47,6 @@ public: virtual std::vector DecodeTensor(const TensorShape &tensorShape, - const unsigned int channelMultiplier = 1, bool isDepthwise = false) = 0; }; @@ -108,14 +107,6 @@ public: return *this; } - TypedIterator& SetIndex(unsigned int index, unsigned int axisIndex = 0) override - { - IgnoreUnused(axisIndex); - ARMNN_ASSERT(m_Iterator); - m_Iterator = m_Start + index; - return *this; - } - protected: T* m_Iterator; T* m_Start; @@ -135,10 +126,9 @@ public: return armnn::Dequantize(*m_Iterator, m_Scale, m_Offset); } std::vector DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector decodedTensor; @@ -173,10 +163,9 @@ public: return armnn::Dequantize(*m_Iterator, m_Scale, m_Offset); } std::vector DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector decodedTensor; @@ -211,10 +200,9 @@ public: return armnn::Dequantize(*m_Iterator, m_Scale, m_Offset); } std::vector DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector decodedTensor; @@ -249,10 +237,9 @@ public: return armnn::Dequantize(*m_Iterator, m_Scale, m_Offset); } std::vector DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector decodedTensor; @@ -289,10 +276,9 @@ public: return val; } std::vector DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector decodedTensor; @@ -328,10 +314,9 @@ public: return val; } std::vector DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector decodedTensor; @@ -365,10 +350,9 @@ public: return *m_Iterator; } std::vector DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector decodedTensor; @@ -393,10 +377,9 @@ public: return static_cast(*m_Iterator) * m_Scale; } std::vector DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector decodedTensor; @@ -430,10 +413,9 @@ public: return static_cast(*m_Iterator); } std::vector DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector decodedTensor; @@ -463,10 +445,9 @@ public: return *m_Iterator; } std::vector DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector decodedTensor; @@ -496,10 +477,9 @@ public: return *m_Iterator; } std::vector DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector decodedTensor; @@ -530,10 +510,9 @@ public: } std::vector DecodeTensor(const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector decodedTensor; @@ -769,23 +748,33 @@ public: } }; -// PerAxisIterator for per-axis quantization +/// PerAxisIterator for per-axis quantization. Iterates over a tensor as layed out in memory and keeps track +/// of the axis index. template class PerAxisIterator : public Base { public: - // axisFactor is used to calculate channelStep - PerAxisIterator(T* data = nullptr, unsigned int axisFactor = 0) - : m_Iterator(data), m_Start(data), m_AxisIndex(0), m_AxisFactor(axisFactor) + PerAxisIterator(T* data = nullptr, + unsigned int axisFactor = 0, + unsigned int axisDimensionality=0) + : m_Iterator(data), + m_Start(data), + m_AxisIndex(0), // iterates over the dimension of axis + m_AxisDimensionality(axisDimensionality), // tensorShape[quantization_dim] + m_AxisFactor(axisFactor), + m_Index(0) {} - // This should be called to set index for per-axis Encoder/Decoder - PerAxisIterator& SetIndex(unsigned int index, unsigned int axisIndex) override + PerAxisIterator(T* data = nullptr, + const armnn::TensorShape& tensorShape = TensorShape(), + const unsigned int axis = 0) + : m_Iterator(data), + m_Start(data), + m_AxisIndex(0), + m_Index(0) { - ARMNN_ASSERT(m_Iterator); - m_Iterator = m_Start + index; - m_AxisIndex = axisIndex; - return *this; + m_AxisDimensionality = tensorShape[axis]; + m_AxisFactor = armnnUtils::GetNumElementsAfter(tensorShape, axis); } void Reset(void* data) override @@ -793,37 +782,50 @@ public: m_Iterator = reinterpret_cast(data); m_Start = m_Iterator; m_AxisIndex = 0; + m_Index = 0; } PerAxisIterator& operator++() override { - ARMNN_ASSERT(m_Iterator); - ++m_Iterator; - m_AxisIndex = static_cast(*m_Iterator) % m_AxisFactor; + ++m_Index; + this -> operator[](m_Index); return *this; } PerAxisIterator& operator+=(const unsigned int increment) override { - ARMNN_ASSERT(m_Iterator); - m_Iterator += increment; - m_AxisIndex = static_cast(*m_Iterator) % m_AxisFactor; + m_Index += increment; + this -> operator[](m_Index); return *this; } PerAxisIterator& operator-=(const unsigned int decrement) override { - ARMNN_ASSERT(m_Iterator); - m_Iterator -= decrement; - m_AxisIndex = static_cast(*m_Iterator) % m_AxisFactor; + m_Index -= decrement; + this -> operator[](m_Index); return *this; } - PerAxisIterator& operator[](const unsigned int index) override + + inline PerAxisIterator& SetIndexOnMem(const unsigned int index) { ARMNN_ASSERT(m_Iterator); m_Iterator = m_Start + index; - m_AxisIndex = static_cast(*m_Iterator) % m_AxisFactor; + if (index < m_AxisFactor) + { + m_AxisIndex = 0; + } + else + { + m_AxisIndex = (index / m_AxisFactor) % m_AxisDimensionality; + } + m_Index = index; + return *this; + } + + PerAxisIterator& operator[](const unsigned int index) override + { + SetIndexOnMem(index); return *this; } @@ -831,18 +833,22 @@ public: T* m_Iterator; T* m_Start; unsigned int m_AxisIndex; + unsigned int m_AxisDimensionality; // tensorShape[quantization_dim] unsigned int m_AxisFactor; + unsigned int m_Index; }; class QSymm8PerAxisDecoder : public PerAxisIterator> { public: - QSymm8PerAxisDecoder(const int8_t* data, const std::vector& scale, unsigned int axisFactor) - : PerAxisIterator(data, axisFactor), m_Scales(scale) {} + QSymm8PerAxisDecoder(const int8_t* data, const armnn::TensorInfo& tensorInfo) + : PerAxisIterator(data, tensorInfo.GetShape(), tensorInfo.GetQuantizationDim().value()), + m_Scales(tensorInfo.GetQuantizationScales()) + {} float Get() const override { - return armnn::Dequantize(*m_Iterator, m_Scales[m_AxisIndex], 0); + return armnn::Dequantize(*m_Iterator, GetScale(), 0); } // Get scale of the current value @@ -852,37 +858,18 @@ public: } std::vector DecodeTensor(const TensorShape &tensorShape, - const unsigned int channelMultiplier, bool isDepthwise) override { - const uint32_t size = tensorShape.GetNumElements(); - const uint32_t scaleSize = static_cast(m_Scales.size()); - - const uint32_t stepSize = isDepthwise ? - tensorShape[2] * tensorShape[3] : tensorShape.GetNumElements() / tensorShape[0]; - - const uint32_t stepNum = size / (stepSize * channelMultiplier); - uint32_t scale; + IgnoreUnused(isDepthwise); + const unsigned int size = tensorShape.GetNumElements(); std::vector decodedTensor; decodedTensor.reserve(size); - // channelMultiplier is only used in depthwise convolutions and in other cases will have no effect - // stepSize is the length of a contiguous area sharing a quantization scale within a tensor - // stepNum is the number of those steps/blocks in the tensor - for (uint32_t mult = 0; mult < channelMultiplier; ++mult) + for (uint32_t i = 0; i < size; ++i) { - for (uint32_t step = 0; step < stepNum; ++step) - { - scale = (channelMultiplier * step + mult) % scaleSize; - for (uint32_t i = 0; i < stepSize; ++i) - { - unsigned int index = mult * stepSize * channelMultiplier + - step * stepSize + i; - this->operator[](index); - decodedTensor.emplace_back(armnn::Dequantize(*m_Iterator, m_Scales[scale], 0)); - } - } + SetIndexOnMem(i); + decodedTensor.emplace_back(armnn::Dequantize(*m_Iterator, GetScale(), 0)); } return decodedTensor; } @@ -920,8 +907,10 @@ private: class ScaledInt32PerAxisDecoder : public PerAxisIterator> { public: - ScaledInt32PerAxisDecoder(const int32_t* data, const std::vector& scales, unsigned int axisFactor) - : PerAxisIterator(data, axisFactor), m_Scales(scales) {} + ScaledInt32PerAxisDecoder(const int32_t* data, const armnn::TensorInfo tensorInfo) + : PerAxisIterator(data, tensorInfo.GetShape(), tensorInfo.GetQuantizationDim().value()), + m_Scales(tensorInfo.GetQuantizationScales()) + {} float Get() const override { @@ -935,17 +924,14 @@ public: } std::vector DecodeTensor(const TensorShape &tensorShape, - const unsigned int channelMultiplier, bool isDepthwise) override { const uint32_t size = tensorShape.GetNumElements(); - const uint32_t scaleSize = static_cast(m_Scales.size()); const uint32_t stepSize = isDepthwise ? tensorShape[2] * tensorShape[3] : tensorShape.GetNumElements() / tensorShape[0]; - const uint32_t stepNum = size / (stepSize * channelMultiplier); - uint32_t scale; + const uint32_t stepNum = size / stepSize; std::vector decodedTensor; decodedTensor.reserve(size); @@ -953,18 +939,14 @@ public: // channelMultiplier is only used in depthwise convolutions and in other cases will have no effect // stepSize is the length of a contiguous area sharing a quantization scale within a tensor // stepNum is the number of those steps/blocks in the tensor - for (uint32_t mult = 0; mult < channelMultiplier; ++mult) + for (uint32_t step = 0; step < stepNum; ++step) { - for (uint32_t step = 0; step < stepNum; ++step) + //scale = (channelMultiplier * step + mult) % scaleSize; + for (uint32_t i = 0; i < stepSize; ++i) { - scale = (channelMultiplier * step + mult) % scaleSize; - for (uint32_t i = 0; i < stepSize; ++i) - { - unsigned int index = mult * stepSize * channelMultiplier + - step * stepSize + i; - this->operator[](index); - decodedTensor.emplace_back(armnn::Dequantize(*m_Iterator, m_Scales[scale], 0)); - } + unsigned int index = step * stepSize + i; + this->operator[](index); + decodedTensor.emplace_back(armnn::Dequantize(*m_Iterator, m_Scales[step], 0)); } } return decodedTensor; diff --git a/src/backends/reference/workloads/ConvImpl.cpp b/src/backends/reference/workloads/ConvImpl.cpp index d7845535df..e1bbc6bc52 100644 --- a/src/backends/reference/workloads/ConvImpl.cpp +++ b/src/backends/reference/workloads/ConvImpl.cpp @@ -95,9 +95,12 @@ void Convolve(const TensorShape& rInputShape, const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex(); const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex(); - const unsigned int depthMultiplier = depthwise ? rFilterShape[0] : 1; - const unsigned int inputChannels = depthwise ? rFilterShape[1] : rFilterShape[channelsIndex]; - const unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : rFilterShape[0]; + // Weights layout: + // Conv2d: [O,H,W,I] + // Depthwise: [1,H,W,O] + const unsigned int inputChannels = rInputShape[channelsIndex]; + const unsigned int outputChannels = rOutputShape[channelsIndex]; + const unsigned int depthMultiplier = depthwise ? outputChannels/inputChannels : 1; const unsigned int batchSize = rOutputShape[0]; const unsigned int outputHeight = rOutputShape[heightIndex]; @@ -105,16 +108,15 @@ void Convolve(const TensorShape& rInputShape, const unsigned int inputHeight = rInputShape[heightIndex]; const unsigned int inputWidth = rInputShape[widthIndex]; - const unsigned int filterHeight = depthwise ? rFilterShape[2] : rFilterShape[heightIndex]; - const unsigned int filterWidth = depthwise ? rFilterShape[3] : rFilterShape[widthIndex]; + const unsigned int filterHeight = depthwise ? rFilterShape[1] : rFilterShape[heightIndex]; + const unsigned int filterWidth = depthwise ? rFilterShape[2] : rFilterShape[widthIndex]; const std::vector inputVec = rInputDecoder.DecodeTensor(rInputShape); - const std::vector filterVec = rFilterDecoder.DecodeTensor(rFilterShape, depthMultiplier, depthwise); + const std::vector filterVec = rFilterDecoder.DecodeTensor(rFilterShape, depthwise); const TensorShape biasShape{outputChannels}; const std::vector biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector(); - unsigned int depthwiseMultiplierIdx = 0; for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++) { for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++) @@ -130,13 +132,6 @@ void Convolve(const TensorShape& rInputShape, // For normal, must loop over each input channel. for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++) { - if (depthwise) - { - depthwiseMultiplierIdx = 0; - cInput = cOutput / depthMultiplier; - depthwiseMultiplierIdx = cOutput % depthMultiplier; - } - for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++) { for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++) @@ -147,10 +142,10 @@ void Convolve(const TensorShape& rInputShape, // Since dimensionality of kernel depends on depthwiseness, so does index. if (depthwise) { - filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels + - cInput * filterWidth * filterHeight + - yFilter * filterWidth + - xFilter; + cInput = cOutput / depthMultiplier; + // filterDepth = outputChannels; + filterIndex = xFilter * outputChannels + cOutput + + yFilter * filterWidth * outputChannels; } else { diff --git a/src/backends/reference/workloads/Decoders.hpp b/src/backends/reference/workloads/Decoders.hpp index 0b3f36047d..cd0dc5d40f 100644 --- a/src/backends/reference/workloads/Decoders.hpp +++ b/src/backends/reference/workloads/Decoders.hpp @@ -20,11 +20,7 @@ namespace inline std::unique_ptr> MakeSigned32PerAxisDecoder(const TensorInfo& info, const void* data) { - auto params = armnnUtils::GetPerAxisParams(info); - return std::make_unique( - static_cast(data), - params.second, - params.first); + return std::make_unique(static_cast(data), info); } inline std::unique_ptr> MakeSigned32Decoder(const TensorInfo& info, const void* data) @@ -75,10 +71,7 @@ inline std::unique_ptr> MakeDecoder(const TensorInfo& info, const case armnn::DataType::QuantizedSymm8PerAxis: { std::pair> params = armnnUtils::GetPerAxisParams(info); - return std::make_unique( - static_cast(data), - params.second, - params.first); + return std::make_unique(static_cast(data), info); } ARMNN_NO_DEPRECATE_WARN_END case DataType::QAsymmS8: @@ -123,10 +116,7 @@ inline std::unique_ptr> MakeDecoder(const TensorInfo& info, const if (info.HasPerAxisQuantization()) { std::pair> params = armnnUtils::GetPerAxisParams(info); - return std::make_unique( - static_cast(data), - params.second, - params.first); + return std::make_unique(static_cast(data), info); } else { diff --git a/src/backends/reference/workloads/TransposeConvolution2d.cpp b/src/backends/reference/workloads/TransposeConvolution2d.cpp index 7408e92982..a1a6cbae68 100644 --- a/src/backends/reference/workloads/TransposeConvolution2d.cpp +++ b/src/backends/reference/workloads/TransposeConvolution2d.cpp @@ -137,7 +137,7 @@ void TransposeConvolution2dImpl(const TransposeConvolution2dDescriptor& descript { for (unsigned int dOutput = 0u; dOutput < outputDepth; ++dOutput) { - rBiasesDecoder.SetIndex(dOutput, dOutput); + rBiasesDecoder[dOutput]; for (unsigned int yOutput = 0u; yOutput < outputHeight; ++yOutput) { for (unsigned int xOutput = 0u; xOutput < outputWidth; ++xOutput) -- cgit v1.2.1