diff options
author | Jan Eilers <jan.eilers@arm.com> | 2021-06-02 12:01:25 +0100 |
---|---|---|
committer | Jan Eilers <jan.eilers@arm.com> | 2021-06-16 11:31:42 +0000 |
commit | 53ef79504b4c881c572735393c2eede5fa556c46 (patch) | |
tree | f6e0cd27c4d03075fa154074c5b12d7c8c3149f7 /src/backends/reference/workloads/BaseIterator.hpp | |
parent | 77fe76bfa8cb798943821d1f3e432c228e1cdee3 (diff) | |
download | armnn-53ef79504b4c881c572735393c2eede5fa556c46.tar.gz |
IVGCVSW-5826 Change weights layout for depthwise to [1,H,W,I*M]
* This change is necessary because tflite uses a [1,H,W,I*M] format
and uses the I*M dimension for per axis quantization. Our previous
layout [M,I,H,W] can't handle the correlating quantization scales.
* Updates Onnx-, TfLiteParser and TfliteDelegate
* Updates the CpuRef, CpuAcc and GpuAcc backends
* Adjusts unit tests
* Adds test to ensure models with old layout can still be read and
executed
* Adds conversion function to previous layout [1,H,W,I*M] --> [M,I,H,W]
which can be used by backend developers
!android-nn-driver:5553
Signed-off-by: Jan Eilers <jan.eilers@arm.com>
Change-Id: Ifef23368b8c3702cf315a5838d214f7dc13c0152
Diffstat (limited to 'src/backends/reference/workloads/BaseIterator.hpp')
-rw-r--r-- | src/backends/reference/workloads/BaseIterator.hpp | 180 |
1 files changed, 81 insertions, 99 deletions
diff --git a/src/backends/reference/workloads/BaseIterator.hpp b/src/backends/reference/workloads/BaseIterator.hpp index 73e24691d9..483ef720f9 100644 --- a/src/backends/reference/workloads/BaseIterator.hpp +++ b/src/backends/reference/workloads/BaseIterator.hpp @@ -8,7 +8,9 @@ #include <armnn/TypesUtils.hpp> #include <armnn/utility/Assert.hpp> #include <armnn/utility/IgnoreUnused.hpp> +#include <armnn/utility/NumericCast.hpp> #include <armnnUtils/FloatingPointConverter.hpp> +#include <armnnUtils/TensorUtils.hpp> #include <ResolveType.hpp> @@ -22,8 +24,6 @@ public: virtual ~BaseIterator() {} - virtual BaseIterator& SetIndex(unsigned int index, unsigned int axisIndex = 0) = 0; - virtual BaseIterator& operator++() = 0; virtual BaseIterator& operator+=(const unsigned int increment) = 0; @@ -47,7 +47,6 @@ public: virtual std::vector<float> DecodeTensor(const TensorShape &tensorShape, - const unsigned int channelMultiplier = 1, bool isDepthwise = false) = 0; }; @@ -108,14 +107,6 @@ public: return *this; } - TypedIterator& SetIndex(unsigned int index, unsigned int axisIndex = 0) override - { - IgnoreUnused(axisIndex); - ARMNN_ASSERT(m_Iterator); - m_Iterator = m_Start + index; - return *this; - } - protected: T* m_Iterator; T* m_Start; @@ -135,10 +126,9 @@ public: return armnn::Dequantize(*m_Iterator, m_Scale, m_Offset); } std::vector<float> DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector<float> decodedTensor; @@ -173,10 +163,9 @@ public: return armnn::Dequantize(*m_Iterator, m_Scale, m_Offset); } std::vector<float> DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector<float> decodedTensor; @@ -211,10 +200,9 @@ public: return armnn::Dequantize(*m_Iterator, m_Scale, m_Offset); } std::vector<float> DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector<float> decodedTensor; @@ -249,10 +237,9 @@ public: return armnn::Dequantize(*m_Iterator, m_Scale, m_Offset); } std::vector<float> DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector<float> decodedTensor; @@ -289,10 +276,9 @@ public: return val; } std::vector<float> DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector<float> decodedTensor; @@ -328,10 +314,9 @@ public: return val; } std::vector<float> DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector<float> decodedTensor; @@ -365,10 +350,9 @@ public: return *m_Iterator; } std::vector<float> DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector<float> decodedTensor; @@ -393,10 +377,9 @@ public: return static_cast<float>(*m_Iterator) * m_Scale; } std::vector<float> DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector<float> decodedTensor; @@ -430,10 +413,9 @@ public: return static_cast<float>(*m_Iterator); } std::vector<float> DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector<float> decodedTensor; @@ -463,10 +445,9 @@ public: return *m_Iterator; } std::vector<float> DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector<float> decodedTensor; @@ -496,10 +477,9 @@ public: return *m_Iterator; } std::vector<float> DecodeTensor (const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector<float> decodedTensor; @@ -530,10 +510,9 @@ public: } std::vector<float> DecodeTensor(const TensorShape& tensorShape, - const unsigned int channelMultiplier, const bool isDepthwise) override { - IgnoreUnused(channelMultiplier, isDepthwise); + IgnoreUnused(isDepthwise); const unsigned int size = tensorShape.GetNumElements(); std::vector<float> decodedTensor; @@ -769,23 +748,33 @@ public: } }; -// PerAxisIterator for per-axis quantization +/// PerAxisIterator for per-axis quantization. Iterates over a tensor as layed out in memory and keeps track +/// of the axis index. template<typename T, typename Base> class PerAxisIterator : public Base { public: - // axisFactor is used to calculate channelStep - PerAxisIterator(T* data = nullptr, unsigned int axisFactor = 0) - : m_Iterator(data), m_Start(data), m_AxisIndex(0), m_AxisFactor(axisFactor) + PerAxisIterator(T* data = nullptr, + unsigned int axisFactor = 0, + unsigned int axisDimensionality=0) + : m_Iterator(data), + m_Start(data), + m_AxisIndex(0), // iterates over the dimension of axis + m_AxisDimensionality(axisDimensionality), // tensorShape[quantization_dim] + m_AxisFactor(axisFactor), + m_Index(0) {} - // This should be called to set index for per-axis Encoder/Decoder - PerAxisIterator& SetIndex(unsigned int index, unsigned int axisIndex) override + PerAxisIterator(T* data = nullptr, + const armnn::TensorShape& tensorShape = TensorShape(), + const unsigned int axis = 0) + : m_Iterator(data), + m_Start(data), + m_AxisIndex(0), + m_Index(0) { - ARMNN_ASSERT(m_Iterator); - m_Iterator = m_Start + index; - m_AxisIndex = axisIndex; - return *this; + m_AxisDimensionality = tensorShape[axis]; + m_AxisFactor = armnnUtils::GetNumElementsAfter(tensorShape, axis); } void Reset(void* data) override @@ -793,37 +782,50 @@ public: m_Iterator = reinterpret_cast<T*>(data); m_Start = m_Iterator; m_AxisIndex = 0; + m_Index = 0; } PerAxisIterator& operator++() override { - ARMNN_ASSERT(m_Iterator); - ++m_Iterator; - m_AxisIndex = static_cast<unsigned int>(*m_Iterator) % m_AxisFactor; + ++m_Index; + this -> operator[](m_Index); return *this; } PerAxisIterator& operator+=(const unsigned int increment) override { - ARMNN_ASSERT(m_Iterator); - m_Iterator += increment; - m_AxisIndex = static_cast<unsigned int>(*m_Iterator) % m_AxisFactor; + m_Index += increment; + this -> operator[](m_Index); return *this; } PerAxisIterator& operator-=(const unsigned int decrement) override { - ARMNN_ASSERT(m_Iterator); - m_Iterator -= decrement; - m_AxisIndex = static_cast<unsigned int>(*m_Iterator) % m_AxisFactor; + m_Index -= decrement; + this -> operator[](m_Index); return *this; } - PerAxisIterator& operator[](const unsigned int index) override + + inline PerAxisIterator& SetIndexOnMem(const unsigned int index) { ARMNN_ASSERT(m_Iterator); m_Iterator = m_Start + index; - m_AxisIndex = static_cast<unsigned int>(*m_Iterator) % m_AxisFactor; + if (index < m_AxisFactor) + { + m_AxisIndex = 0; + } + else + { + m_AxisIndex = (index / m_AxisFactor) % m_AxisDimensionality; + } + m_Index = index; + return *this; + } + + PerAxisIterator& operator[](const unsigned int index) override + { + SetIndexOnMem(index); return *this; } @@ -831,18 +833,22 @@ public: T* m_Iterator; T* m_Start; unsigned int m_AxisIndex; + unsigned int m_AxisDimensionality; // tensorShape[quantization_dim] unsigned int m_AxisFactor; + unsigned int m_Index; }; class QSymm8PerAxisDecoder : public PerAxisIterator<const int8_t, Decoder<float>> { public: - QSymm8PerAxisDecoder(const int8_t* data, const std::vector<float>& scale, unsigned int axisFactor) - : PerAxisIterator(data, axisFactor), m_Scales(scale) {} + QSymm8PerAxisDecoder(const int8_t* data, const armnn::TensorInfo& tensorInfo) + : PerAxisIterator(data, tensorInfo.GetShape(), tensorInfo.GetQuantizationDim().value()), + m_Scales(tensorInfo.GetQuantizationScales()) + {} float Get() const override { - return armnn::Dequantize(*m_Iterator, m_Scales[m_AxisIndex], 0); + return armnn::Dequantize(*m_Iterator, GetScale(), 0); } // Get scale of the current value @@ -852,37 +858,18 @@ public: } std::vector<float> DecodeTensor(const TensorShape &tensorShape, - const unsigned int channelMultiplier, bool isDepthwise) override { - const uint32_t size = tensorShape.GetNumElements(); - const uint32_t scaleSize = static_cast<uint32_t>(m_Scales.size()); - - const uint32_t stepSize = isDepthwise ? - tensorShape[2] * tensorShape[3] : tensorShape.GetNumElements() / tensorShape[0]; - - const uint32_t stepNum = size / (stepSize * channelMultiplier); - uint32_t scale; + IgnoreUnused(isDepthwise); + const unsigned int size = tensorShape.GetNumElements(); std::vector<float> decodedTensor; decodedTensor.reserve(size); - // channelMultiplier is only used in depthwise convolutions and in other cases will have no effect - // stepSize is the length of a contiguous area sharing a quantization scale within a tensor - // stepNum is the number of those steps/blocks in the tensor - for (uint32_t mult = 0; mult < channelMultiplier; ++mult) + for (uint32_t i = 0; i < size; ++i) { - for (uint32_t step = 0; step < stepNum; ++step) - { - scale = (channelMultiplier * step + mult) % scaleSize; - for (uint32_t i = 0; i < stepSize; ++i) - { - unsigned int index = mult * stepSize * channelMultiplier + - step * stepSize + i; - this->operator[](index); - decodedTensor.emplace_back(armnn::Dequantize(*m_Iterator, m_Scales[scale], 0)); - } - } + SetIndexOnMem(i); + decodedTensor.emplace_back(armnn::Dequantize(*m_Iterator, GetScale(), 0)); } return decodedTensor; } @@ -920,8 +907,10 @@ private: class ScaledInt32PerAxisDecoder : public PerAxisIterator<const int32_t, Decoder<float>> { public: - ScaledInt32PerAxisDecoder(const int32_t* data, const std::vector<float>& scales, unsigned int axisFactor) - : PerAxisIterator(data, axisFactor), m_Scales(scales) {} + ScaledInt32PerAxisDecoder(const int32_t* data, const armnn::TensorInfo tensorInfo) + : PerAxisIterator(data, tensorInfo.GetShape(), tensorInfo.GetQuantizationDim().value()), + m_Scales(tensorInfo.GetQuantizationScales()) + {} float Get() const override { @@ -935,17 +924,14 @@ public: } std::vector<float> DecodeTensor(const TensorShape &tensorShape, - const unsigned int channelMultiplier, bool isDepthwise) override { const uint32_t size = tensorShape.GetNumElements(); - const uint32_t scaleSize = static_cast<uint32_t>(m_Scales.size()); const uint32_t stepSize = isDepthwise ? tensorShape[2] * tensorShape[3] : tensorShape.GetNumElements() / tensorShape[0]; - const uint32_t stepNum = size / (stepSize * channelMultiplier); - uint32_t scale; + const uint32_t stepNum = size / stepSize; std::vector<float> decodedTensor; decodedTensor.reserve(size); @@ -953,18 +939,14 @@ public: // channelMultiplier is only used in depthwise convolutions and in other cases will have no effect // stepSize is the length of a contiguous area sharing a quantization scale within a tensor // stepNum is the number of those steps/blocks in the tensor - for (uint32_t mult = 0; mult < channelMultiplier; ++mult) + for (uint32_t step = 0; step < stepNum; ++step) { - for (uint32_t step = 0; step < stepNum; ++step) + //scale = (channelMultiplier * step + mult) % scaleSize; + for (uint32_t i = 0; i < stepSize; ++i) { - scale = (channelMultiplier * step + mult) % scaleSize; - for (uint32_t i = 0; i < stepSize; ++i) - { - unsigned int index = mult * stepSize * channelMultiplier + - step * stepSize + i; - this->operator[](index); - decodedTensor.emplace_back(armnn::Dequantize(*m_Iterator, m_Scales[scale], 0)); - } + unsigned int index = step * stepSize + i; + this->operator[](index); + decodedTensor.emplace_back(armnn::Dequantize(*m_Iterator, m_Scales[step], 0)); } } return decodedTensor; |