aboutsummaryrefslogtreecommitdiff
path: root/src/backends/reference
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/reference')
-rw-r--r--src/backends/reference/test/CMakeLists.txt2
-rw-r--r--src/backends/reference/test/RefPerAxisIteratorTests.cpp252
-rw-r--r--src/backends/reference/test/RefPerChannelDecoderTests.cpp156
-rw-r--r--src/backends/reference/workloads/BaseIterator.hpp180
-rw-r--r--src/backends/reference/workloads/ConvImpl.cpp31
-rw-r--r--src/backends/reference/workloads/Decoders.hpp16
-rw-r--r--src/backends/reference/workloads/TransposeConvolution2d.cpp2
7 files changed, 508 insertions, 131 deletions
diff --git a/src/backends/reference/test/CMakeLists.txt b/src/backends/reference/test/CMakeLists.txt
index 76541cfdaa..d7c5da896a 100644
--- a/src/backends/reference/test/CMakeLists.txt
+++ b/src/backends/reference/test/CMakeLists.txt
@@ -13,6 +13,8 @@ list(APPEND armnnRefBackendUnitTests_sources
RefLayerTests.cpp
RefMemoryManagerTests.cpp
RefOptimizedNetworkTests.cpp
+ RefPerAxisIteratorTests.cpp
+ RefPerChannelDecoderTests.cpp
RefRuntimeTests.cpp
RefTensorHandleTests.cpp
RefWorkloadFactoryHelper.hpp
diff --git a/src/backends/reference/test/RefPerAxisIteratorTests.cpp b/src/backends/reference/test/RefPerAxisIteratorTests.cpp
new file mode 100644
index 0000000000..7da4c0fb0f
--- /dev/null
+++ b/src/backends/reference/test/RefPerAxisIteratorTests.cpp
@@ -0,0 +1,252 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <reference/workloads/Decoders.hpp>
+#include <armnn/utility/NumericCast.hpp>
+
+#include <fmt/format.h>
+
+#include <boost/test/unit_test.hpp>
+#include <chrono>
+
+
+template<typename T>
+void CompareVector(std::vector<T> vec1, std::vector<T> vec2)
+{
+ BOOST_TEST(vec1.size() == vec2.size());
+
+ bool mismatch = false;
+ for (uint i = 0; i < vec1.size(); ++i)
+ {
+ if (vec1[i] != vec2[i])
+ {
+ /*std::stringstream ss;
+ ss << "Vector value mismatch: index=" << i << " " << vec1[i] << "!=" << vec2[i];*/
+ BOOST_TEST_MESSAGE(fmt::format("Vector value mismatch: index={} {} != {}",
+ i,
+ vec1[i],
+ vec2[i]));
+ mismatch = true;
+ }
+ }
+
+ if (mismatch)
+ {
+ BOOST_FAIL("Error in CompareVector. Vectors don't match.");
+ }
+}
+
+using namespace armnn;
+
+// Basically a per axis decoder but without any decoding/quantization
+class MockPerAxisIterator : public PerAxisIterator<const int8_t, Decoder<int8_t>>
+{
+public:
+ MockPerAxisIterator(const int8_t* data, const armnn::TensorShape& tensorShape, const unsigned int axis)
+ : PerAxisIterator(data, tensorShape, axis), m_NumElements(tensorShape.GetNumElements())
+ {}
+
+ int8_t Get() const override
+ {
+ return *m_Iterator;
+ }
+
+ virtual std::vector<float> DecodeTensor(const TensorShape &tensorShape,
+ bool isDepthwise = false) override
+ {
+ IgnoreUnused(tensorShape, isDepthwise);
+ return std::vector<float>{};
+ };
+
+ // Iterates over data using operator[] and returns vector
+ std::vector<int8_t> Loop()
+ {
+ std::vector<int8_t> vec;
+ for (uint32_t i = 0; i < m_NumElements; ++i)
+ {
+ this->operator[](i);
+ vec.emplace_back(Get());
+ }
+ return vec;
+ }
+
+ unsigned int GetAxisIndex()
+ {
+ return m_AxisIndex;
+ }
+ unsigned int m_NumElements;
+};
+
+BOOST_AUTO_TEST_SUITE(RefPerAxisIterator)
+
+// Test Loop (Equivalent to DecodeTensor) and Axis = 0
+BOOST_AUTO_TEST_CASE(PerAxisIteratorTest1)
+{
+ std::vector<int8_t> input = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
+ TensorInfo tensorInfo ({3,1,2,2},DataType::QSymmS8);
+
+ // test axis=0
+ std::vector<int8_t> expOutput = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
+ auto iterator = MockPerAxisIterator(input.data(), tensorInfo.GetShape(), 0);
+ std::vector<int8_t> output = iterator.Loop();
+ CompareVector(output, expOutput);
+
+ // Set iterator to index and check if the axis index is correct
+ iterator[5];
+ BOOST_TEST(iterator.GetAxisIndex() == 1u);
+
+ iterator[1];
+ BOOST_TEST(iterator.GetAxisIndex() == 0u);
+
+ iterator[10];
+ BOOST_TEST(iterator.GetAxisIndex() == 2u);
+}
+
+// Test Axis = 1
+BOOST_AUTO_TEST_CASE(PerAxisIteratorTest2)
+{
+ std::vector<int8_t> input = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
+ TensorInfo tensorInfo ({3,1,2,2},DataType::QSymmS8);
+
+ // test axis=1
+ std::vector<int8_t> expOutput = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
+ auto iterator = MockPerAxisIterator(input.data(), tensorInfo.GetShape(), 1);
+ std::vector<int8_t> output = iterator.Loop();
+ CompareVector(output, expOutput);
+
+ // Set iterator to index and check if the axis index is correct
+ iterator[5];
+ BOOST_TEST(iterator.GetAxisIndex() == 0u);
+
+ iterator[1];
+ BOOST_TEST(iterator.GetAxisIndex() == 0u);
+
+ iterator[10];
+ BOOST_TEST(iterator.GetAxisIndex() == 0u);
+}
+
+// Test Axis = 2
+BOOST_AUTO_TEST_CASE(PerAxisIteratorTest3)
+{
+ std::vector<int8_t> input = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
+ TensorInfo tensorInfo ({3,1,2,2},DataType::QSymmS8);
+
+ // test axis=2
+ std::vector<int8_t> expOutput = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
+ auto iterator = MockPerAxisIterator(input.data(), tensorInfo.GetShape(), 2);
+ std::vector<int8_t> output = iterator.Loop();
+ CompareVector(output, expOutput);
+
+ // Set iterator to index and check if the axis index is correct
+ iterator[5];
+ BOOST_TEST(iterator.GetAxisIndex() == 0u);
+
+ iterator[1];
+ BOOST_TEST(iterator.GetAxisIndex() == 0u);
+
+ iterator[10];
+ BOOST_TEST(iterator.GetAxisIndex() == 1u);
+}
+
+// Test Axis = 3
+BOOST_AUTO_TEST_CASE(PerAxisIteratorTest4)
+{
+ std::vector<int8_t> input = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
+ TensorInfo tensorInfo ({3,1,2,2},DataType::QSymmS8);
+
+ // test axis=3
+ std::vector<int8_t> expOutput = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
+ auto iterator = MockPerAxisIterator(input.data(), tensorInfo.GetShape(), 3);
+ std::vector<int8_t> output = iterator.Loop();
+ CompareVector(output, expOutput);
+
+ // Set iterator to index and check if the axis index is correct
+ iterator[5];
+ BOOST_TEST(iterator.GetAxisIndex() == 1u);
+
+ iterator[1];
+ BOOST_TEST(iterator.GetAxisIndex() == 1u);
+
+ iterator[10];
+ BOOST_TEST(iterator.GetAxisIndex() == 0u);
+}
+
+
+// Test Axis = 1. Different tensor shape
+BOOST_AUTO_TEST_CASE(PerAxisIteratorTest5)
+{
+ using namespace armnn;
+ std::vector<int8_t> input =
+ {
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, 14, 15
+ };
+
+ std::vector<int8_t> expOutput =
+ {
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, 14, 15
+ };
+
+ TensorInfo tensorInfo ({2,2,2,2},DataType::QSymmS8);
+ auto iterator = MockPerAxisIterator(input.data(), tensorInfo.GetShape(), 1);
+ std::vector<int8_t> output = iterator.Loop();
+ CompareVector(output, expOutput);
+
+ // Set iterator to index and check if the axis index is correct
+ iterator[5];
+ BOOST_TEST(iterator.GetAxisIndex() == 1u);
+
+ iterator[1];
+ BOOST_TEST(iterator.GetAxisIndex() == 0u);
+
+ iterator[10];
+ BOOST_TEST(iterator.GetAxisIndex() == 0u);
+}
+
+// Test the increment and decrement operator
+BOOST_AUTO_TEST_CASE(PerAxisIteratorTest7)
+{
+ using namespace armnn;
+ std::vector<int8_t> input =
+ {
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11
+ };
+
+ std::vector<int8_t> expOutput =
+ {
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11
+ };
+
+ TensorInfo tensorInfo ({3,1,2,2},DataType::QSymmS8);
+ auto iterator = MockPerAxisIterator(input.data(), tensorInfo.GetShape(), 2);
+
+ iterator += 3;
+ BOOST_TEST(iterator.Get(), expOutput[3]);
+ BOOST_TEST(iterator.GetAxisIndex() == 1u);
+
+ iterator += 3;
+ BOOST_TEST(iterator.Get(), expOutput[6]);
+ BOOST_TEST(iterator.GetAxisIndex() == 1u);
+
+ iterator -= 2;
+ BOOST_TEST(iterator.Get(), expOutput[4]);
+ BOOST_TEST(iterator.GetAxisIndex() == 0u);
+
+ iterator -= 1;
+ BOOST_TEST(iterator.Get(), expOutput[3]);
+ BOOST_TEST(iterator.GetAxisIndex() == 1u);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file
diff --git a/src/backends/reference/test/RefPerChannelDecoderTests.cpp b/src/backends/reference/test/RefPerChannelDecoderTests.cpp
new file mode 100644
index 0000000000..c2e3cee7a0
--- /dev/null
+++ b/src/backends/reference/test/RefPerChannelDecoderTests.cpp
@@ -0,0 +1,156 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <reference/workloads/Decoders.hpp>
+#include <armnn/utility/NumericCast.hpp>
+
+#include <fmt/format.h>
+
+#include <boost/test/unit_test.hpp>
+
+BOOST_AUTO_TEST_SUITE(RefPerChannelDecoder)
+
+template<typename T>
+void CompareVector(std::vector<T> vec1, std::vector<T> vec2)
+{
+ BOOST_TEST(vec1.size() == vec2.size());
+
+ bool mismatch = false;
+ for (uint i = 0; i < vec1.size(); ++i)
+ {
+ if (vec1[i] != vec2[i])
+ {
+ /*std::stringstream ss;
+ ss << "Vector value mismatch: index=" << i << " " << vec1[i] << "!=" << vec2[i];*/
+ BOOST_TEST_MESSAGE(fmt::format("Vector value mismatch: index={} {} != {}",
+ i,
+ vec1[i],
+ vec2[i]));
+ mismatch = true;
+ }
+ }
+
+ if (mismatch)
+ {
+ BOOST_FAIL("Error in CompareVector. Vectors don't match.");
+ }
+}
+
+// Ensure quantization works for none depthwise convolutions
+BOOST_AUTO_TEST_CASE(RefPerChannelDecoderTest1)
+{
+ using namespace armnn;
+ std::vector<int8_t> input =
+ {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23
+ };
+
+ std::vector<float> expOutput =
+ {
+ 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,
+ 24.0f, 26.0f, 28.0f, 30.0f, 32.0f, 34.0f, 36.0f, 38.0f, 40.0f, 42.0f, 44.0f, 46.0f
+ };
+
+ TensorInfo tensorInfo ({2,2,2,3},DataType::QSymmS8,{1.0f, 2.0f},0);
+ auto decoder = MakeDecoder<float>(tensorInfo, input.data());
+
+ std::vector<float> output = decoder->DecodeTensor(tensorInfo.GetShape());
+
+ CompareVector(output, expOutput);
+}
+
+// Ensure quantization works for depthwise convolutions M=1
+BOOST_AUTO_TEST_CASE(RefPerChannelDecoderTest2)
+{
+ using namespace armnn;
+ std::vector<int8_t> input =
+ {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+ };
+
+ std::vector<float> expOutput =
+ {
+ 0.0f, 1.0f, 2.0f, 3.0f,
+ 8.0f, 10.0f, 12.0f, 14.0f,
+ 24.0f, 27.0f, 30.0f, 33.0f,
+ 48.0f, 52.0f, 56.0f, 60.0f
+ };
+
+ // [O,1,H,W] = [I*M,1,H,W] = [4*1,1,2,2]
+ TensorInfo tensorInfo ({4,1,2,2},DataType::QSymmS8,{1.0f, 2.0f, 3.0f, 4.0f},0);
+ auto decoder = MakeDecoder<float>(tensorInfo, input.data());
+
+ std::vector<float> output = decoder->DecodeTensor(tensorInfo.GetShape(), true);
+
+ CompareVector(output, expOutput);
+}
+
+// Ensure quantization works for depthwise convolutions M=2
+BOOST_AUTO_TEST_CASE(RefPerChannelDecoderTest3)
+{
+ using namespace armnn;
+ std::vector<int8_t> input =
+ {
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, 14, 15,
+ 16, 17, 18, 19,
+ 20, 21, 22, 23
+ };
+
+ std::vector<float> expOutput =
+ {
+ 0.0f, 1.0f, 2.0f, 3.0f,
+ 8.0f, 10.0f, 12.0f, 14.0f,
+ 24.0f, 27.0f, 30.0f, 33.0f,
+ 48.0f, 52.0f, 56.0f, 60.0f,
+ 80.0f, 85.0f, 90.0f, 95.0f,
+ 120.0f, 126.0f, 132.0f, 138.0f
+ };
+
+ // [O,1,H,W] = [I*M,1,H,W] = [3*2,1,2,2]
+ TensorInfo tensorInfo ({6,1,2,2},DataType::QSymmS8,{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f},0);
+ auto decoder = MakeDecoder<float>(tensorInfo, input.data());
+
+ std::vector<float> output = decoder->DecodeTensor(tensorInfo.GetShape(), true);
+
+ CompareVector(output, expOutput);
+}
+
+// Ensure quantization works for depthwise convolutions M=2 for int32
+BOOST_AUTO_TEST_CASE(RefPerChannelDecoderTest4)
+{
+ using namespace armnn;
+ std::vector<int32_t> input =
+ {
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, 14, 15,
+ 16, 17, 18, 19,
+ 20, 21, 22, 23
+ };
+
+ std::vector<float> expOutput =
+ {
+ 0.0f, 1.0f, 2.0f, 3.0f,
+ 8.0f, 10.0f, 12.0f, 14.0f,
+ 24.0f, 27.0f, 30.0f, 33.0f,
+ 48.0f, 52.0f, 56.0f, 60.0f,
+ 80.0f, 85.0f, 90.0f, 95.0f,
+ 120.0f, 126.0f, 132.0f, 138.0f
+ };
+
+ // [O,1,H,W] = [I*M,1,H,W] = [3*2,1,2,2]
+ TensorInfo tensorInfo ({6,1,2,2},DataType::Signed32,{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f},0);
+ auto decoder = MakeDecoder<float>(tensorInfo, input.data());
+
+ std::vector<float> output = decoder->DecodeTensor(tensorInfo.GetShape(), true);
+
+ CompareVector(output, expOutput);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/reference/workloads/BaseIterator.hpp b/src/backends/reference/workloads/BaseIterator.hpp
index 73e24691d9..483ef720f9 100644
--- a/src/backends/reference/workloads/BaseIterator.hpp
+++ b/src/backends/reference/workloads/BaseIterator.hpp
@@ -8,7 +8,9 @@
#include <armnn/TypesUtils.hpp>
#include <armnn/utility/Assert.hpp>
#include <armnn/utility/IgnoreUnused.hpp>
+#include <armnn/utility/NumericCast.hpp>
#include <armnnUtils/FloatingPointConverter.hpp>
+#include <armnnUtils/TensorUtils.hpp>
#include <ResolveType.hpp>
@@ -22,8 +24,6 @@ public:
virtual ~BaseIterator() {}
- virtual BaseIterator& SetIndex(unsigned int index, unsigned int axisIndex = 0) = 0;
-
virtual BaseIterator& operator++() = 0;
virtual BaseIterator& operator+=(const unsigned int increment) = 0;
@@ -47,7 +47,6 @@ public:
virtual std::vector<float>
DecodeTensor(const TensorShape &tensorShape,
- const unsigned int channelMultiplier = 1,
bool isDepthwise = false) = 0;
};
@@ -108,14 +107,6 @@ public:
return *this;
}
- TypedIterator& SetIndex(unsigned int index, unsigned int axisIndex = 0) override
- {
- IgnoreUnused(axisIndex);
- ARMNN_ASSERT(m_Iterator);
- m_Iterator = m_Start + index;
- return *this;
- }
-
protected:
T* m_Iterator;
T* m_Start;
@@ -135,10 +126,9 @@ public:
return armnn::Dequantize(*m_Iterator, m_Scale, m_Offset);
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -173,10 +163,9 @@ public:
return armnn::Dequantize(*m_Iterator, m_Scale, m_Offset);
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -211,10 +200,9 @@ public:
return armnn::Dequantize(*m_Iterator, m_Scale, m_Offset);
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -249,10 +237,9 @@ public:
return armnn::Dequantize(*m_Iterator, m_Scale, m_Offset);
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -289,10 +276,9 @@ public:
return val;
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -328,10 +314,9 @@ public:
return val;
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -365,10 +350,9 @@ public:
return *m_Iterator;
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -393,10 +377,9 @@ public:
return static_cast<float>(*m_Iterator) * m_Scale;
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -430,10 +413,9 @@ public:
return static_cast<float>(*m_Iterator);
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -463,10 +445,9 @@ public:
return *m_Iterator;
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -496,10 +477,9 @@ public:
return *m_Iterator;
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -530,10 +510,9 @@ public:
}
std::vector<float> DecodeTensor(const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -769,23 +748,33 @@ public:
}
};
-// PerAxisIterator for per-axis quantization
+/// PerAxisIterator for per-axis quantization. Iterates over a tensor as layed out in memory and keeps track
+/// of the axis index.
template<typename T, typename Base>
class PerAxisIterator : public Base
{
public:
- // axisFactor is used to calculate channelStep
- PerAxisIterator(T* data = nullptr, unsigned int axisFactor = 0)
- : m_Iterator(data), m_Start(data), m_AxisIndex(0), m_AxisFactor(axisFactor)
+ PerAxisIterator(T* data = nullptr,
+ unsigned int axisFactor = 0,
+ unsigned int axisDimensionality=0)
+ : m_Iterator(data),
+ m_Start(data),
+ m_AxisIndex(0), // iterates over the dimension of axis
+ m_AxisDimensionality(axisDimensionality), // tensorShape[quantization_dim]
+ m_AxisFactor(axisFactor),
+ m_Index(0)
{}
- // This should be called to set index for per-axis Encoder/Decoder
- PerAxisIterator& SetIndex(unsigned int index, unsigned int axisIndex) override
+ PerAxisIterator(T* data = nullptr,
+ const armnn::TensorShape& tensorShape = TensorShape(),
+ const unsigned int axis = 0)
+ : m_Iterator(data),
+ m_Start(data),
+ m_AxisIndex(0),
+ m_Index(0)
{
- ARMNN_ASSERT(m_Iterator);
- m_Iterator = m_Start + index;
- m_AxisIndex = axisIndex;
- return *this;
+ m_AxisDimensionality = tensorShape[axis];
+ m_AxisFactor = armnnUtils::GetNumElementsAfter(tensorShape, axis);
}
void Reset(void* data) override
@@ -793,37 +782,50 @@ public:
m_Iterator = reinterpret_cast<T*>(data);
m_Start = m_Iterator;
m_AxisIndex = 0;
+ m_Index = 0;
}
PerAxisIterator& operator++() override
{
- ARMNN_ASSERT(m_Iterator);
- ++m_Iterator;
- m_AxisIndex = static_cast<unsigned int>(*m_Iterator) % m_AxisFactor;
+ ++m_Index;
+ this -> operator[](m_Index);
return *this;
}
PerAxisIterator& operator+=(const unsigned int increment) override
{
- ARMNN_ASSERT(m_Iterator);
- m_Iterator += increment;
- m_AxisIndex = static_cast<unsigned int>(*m_Iterator) % m_AxisFactor;
+ m_Index += increment;
+ this -> operator[](m_Index);
return *this;
}
PerAxisIterator& operator-=(const unsigned int decrement) override
{
- ARMNN_ASSERT(m_Iterator);
- m_Iterator -= decrement;
- m_AxisIndex = static_cast<unsigned int>(*m_Iterator) % m_AxisFactor;
+ m_Index -= decrement;
+ this -> operator[](m_Index);
return *this;
}
- PerAxisIterator& operator[](const unsigned int index) override
+
+ inline PerAxisIterator& SetIndexOnMem(const unsigned int index)
{
ARMNN_ASSERT(m_Iterator);
m_Iterator = m_Start + index;
- m_AxisIndex = static_cast<unsigned int>(*m_Iterator) % m_AxisFactor;
+ if (index < m_AxisFactor)
+ {
+ m_AxisIndex = 0;
+ }
+ else
+ {
+ m_AxisIndex = (index / m_AxisFactor) % m_AxisDimensionality;
+ }
+ m_Index = index;
+ return *this;
+ }
+
+ PerAxisIterator& operator[](const unsigned int index) override
+ {
+ SetIndexOnMem(index);
return *this;
}
@@ -831,18 +833,22 @@ public:
T* m_Iterator;
T* m_Start;
unsigned int m_AxisIndex;
+ unsigned int m_AxisDimensionality; // tensorShape[quantization_dim]
unsigned int m_AxisFactor;
+ unsigned int m_Index;
};
class QSymm8PerAxisDecoder : public PerAxisIterator<const int8_t, Decoder<float>>
{
public:
- QSymm8PerAxisDecoder(const int8_t* data, const std::vector<float>& scale, unsigned int axisFactor)
- : PerAxisIterator(data, axisFactor), m_Scales(scale) {}
+ QSymm8PerAxisDecoder(const int8_t* data, const armnn::TensorInfo& tensorInfo)
+ : PerAxisIterator(data, tensorInfo.GetShape(), tensorInfo.GetQuantizationDim().value()),
+ m_Scales(tensorInfo.GetQuantizationScales())
+ {}
float Get() const override
{
- return armnn::Dequantize(*m_Iterator, m_Scales[m_AxisIndex], 0);
+ return armnn::Dequantize(*m_Iterator, GetScale(), 0);
}
// Get scale of the current value
@@ -852,37 +858,18 @@ public:
}
std::vector<float> DecodeTensor(const TensorShape &tensorShape,
- const unsigned int channelMultiplier,
bool isDepthwise) override
{
- const uint32_t size = tensorShape.GetNumElements();
- const uint32_t scaleSize = static_cast<uint32_t>(m_Scales.size());
-
- const uint32_t stepSize = isDepthwise ?
- tensorShape[2] * tensorShape[3] : tensorShape.GetNumElements() / tensorShape[0];
-
- const uint32_t stepNum = size / (stepSize * channelMultiplier);
- uint32_t scale;
+ IgnoreUnused(isDepthwise);
+ const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
decodedTensor.reserve(size);
- // channelMultiplier is only used in depthwise convolutions and in other cases will have no effect
- // stepSize is the length of a contiguous area sharing a quantization scale within a tensor
- // stepNum is the number of those steps/blocks in the tensor
- for (uint32_t mult = 0; mult < channelMultiplier; ++mult)
+ for (uint32_t i = 0; i < size; ++i)
{
- for (uint32_t step = 0; step < stepNum; ++step)
- {
- scale = (channelMultiplier * step + mult) % scaleSize;
- for (uint32_t i = 0; i < stepSize; ++i)
- {
- unsigned int index = mult * stepSize * channelMultiplier +
- step * stepSize + i;
- this->operator[](index);
- decodedTensor.emplace_back(armnn::Dequantize(*m_Iterator, m_Scales[scale], 0));
- }
- }
+ SetIndexOnMem(i);
+ decodedTensor.emplace_back(armnn::Dequantize(*m_Iterator, GetScale(), 0));
}
return decodedTensor;
}
@@ -920,8 +907,10 @@ private:
class ScaledInt32PerAxisDecoder : public PerAxisIterator<const int32_t, Decoder<float>>
{
public:
- ScaledInt32PerAxisDecoder(const int32_t* data, const std::vector<float>& scales, unsigned int axisFactor)
- : PerAxisIterator(data, axisFactor), m_Scales(scales) {}
+ ScaledInt32PerAxisDecoder(const int32_t* data, const armnn::TensorInfo tensorInfo)
+ : PerAxisIterator(data, tensorInfo.GetShape(), tensorInfo.GetQuantizationDim().value()),
+ m_Scales(tensorInfo.GetQuantizationScales())
+ {}
float Get() const override
{
@@ -935,17 +924,14 @@ public:
}
std::vector<float> DecodeTensor(const TensorShape &tensorShape,
- const unsigned int channelMultiplier,
bool isDepthwise) override
{
const uint32_t size = tensorShape.GetNumElements();
- const uint32_t scaleSize = static_cast<uint32_t>(m_Scales.size());
const uint32_t stepSize = isDepthwise ?
tensorShape[2] * tensorShape[3] : tensorShape.GetNumElements() / tensorShape[0];
- const uint32_t stepNum = size / (stepSize * channelMultiplier);
- uint32_t scale;
+ const uint32_t stepNum = size / stepSize;
std::vector<float> decodedTensor;
decodedTensor.reserve(size);
@@ -953,18 +939,14 @@ public:
// channelMultiplier is only used in depthwise convolutions and in other cases will have no effect
// stepSize is the length of a contiguous area sharing a quantization scale within a tensor
// stepNum is the number of those steps/blocks in the tensor
- for (uint32_t mult = 0; mult < channelMultiplier; ++mult)
+ for (uint32_t step = 0; step < stepNum; ++step)
{
- for (uint32_t step = 0; step < stepNum; ++step)
+ //scale = (channelMultiplier * step + mult) % scaleSize;
+ for (uint32_t i = 0; i < stepSize; ++i)
{
- scale = (channelMultiplier * step + mult) % scaleSize;
- for (uint32_t i = 0; i < stepSize; ++i)
- {
- unsigned int index = mult * stepSize * channelMultiplier +
- step * stepSize + i;
- this->operator[](index);
- decodedTensor.emplace_back(armnn::Dequantize(*m_Iterator, m_Scales[scale], 0));
- }
+ unsigned int index = step * stepSize + i;
+ this->operator[](index);
+ decodedTensor.emplace_back(armnn::Dequantize(*m_Iterator, m_Scales[step], 0));
}
}
return decodedTensor;
diff --git a/src/backends/reference/workloads/ConvImpl.cpp b/src/backends/reference/workloads/ConvImpl.cpp
index d7845535df..e1bbc6bc52 100644
--- a/src/backends/reference/workloads/ConvImpl.cpp
+++ b/src/backends/reference/workloads/ConvImpl.cpp
@@ -95,9 +95,12 @@ void Convolve(const TensorShape& rInputShape,
const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
- const unsigned int depthMultiplier = depthwise ? rFilterShape[0] : 1;
- const unsigned int inputChannels = depthwise ? rFilterShape[1] : rFilterShape[channelsIndex];
- const unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : rFilterShape[0];
+ // Weights layout:
+ // Conv2d: [O,H,W,I]
+ // Depthwise: [1,H,W,O]
+ const unsigned int inputChannels = rInputShape[channelsIndex];
+ const unsigned int outputChannels = rOutputShape[channelsIndex];
+ const unsigned int depthMultiplier = depthwise ? outputChannels/inputChannels : 1;
const unsigned int batchSize = rOutputShape[0];
const unsigned int outputHeight = rOutputShape[heightIndex];
@@ -105,16 +108,15 @@ void Convolve(const TensorShape& rInputShape,
const unsigned int inputHeight = rInputShape[heightIndex];
const unsigned int inputWidth = rInputShape[widthIndex];
- const unsigned int filterHeight = depthwise ? rFilterShape[2] : rFilterShape[heightIndex];
- const unsigned int filterWidth = depthwise ? rFilterShape[3] : rFilterShape[widthIndex];
+ const unsigned int filterHeight = depthwise ? rFilterShape[1] : rFilterShape[heightIndex];
+ const unsigned int filterWidth = depthwise ? rFilterShape[2] : rFilterShape[widthIndex];
const std::vector<float> inputVec = rInputDecoder.DecodeTensor(rInputShape);
- const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape, depthMultiplier, depthwise);
+ const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape, depthwise);
const TensorShape biasShape{outputChannels};
const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
- unsigned int depthwiseMultiplierIdx = 0;
for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
{
for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
@@ -130,13 +132,6 @@ void Convolve(const TensorShape& rInputShape,
// For normal, must loop over each input channel.
for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
{
- if (depthwise)
- {
- depthwiseMultiplierIdx = 0;
- cInput = cOutput / depthMultiplier;
- depthwiseMultiplierIdx = cOutput % depthMultiplier;
- }
-
for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
{
for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
@@ -147,10 +142,10 @@ void Convolve(const TensorShape& rInputShape,
// Since dimensionality of kernel depends on depthwiseness, so does index.
if (depthwise)
{
- filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels +
- cInput * filterWidth * filterHeight +
- yFilter * filterWidth +
- xFilter;
+ cInput = cOutput / depthMultiplier;
+ // filterDepth = outputChannels;
+ filterIndex = xFilter * outputChannels + cOutput +
+ yFilter * filterWidth * outputChannels;
}
else
{
diff --git a/src/backends/reference/workloads/Decoders.hpp b/src/backends/reference/workloads/Decoders.hpp
index 0b3f36047d..cd0dc5d40f 100644
--- a/src/backends/reference/workloads/Decoders.hpp
+++ b/src/backends/reference/workloads/Decoders.hpp
@@ -20,11 +20,7 @@ namespace
inline std::unique_ptr<Decoder<float>> MakeSigned32PerAxisDecoder(const TensorInfo& info, const void* data)
{
- auto params = armnnUtils::GetPerAxisParams(info);
- return std::make_unique<ScaledInt32PerAxisDecoder>(
- static_cast<const int32_t*>(data),
- params.second,
- params.first);
+ return std::make_unique<ScaledInt32PerAxisDecoder>(static_cast<const int32_t*>(data), info);
}
inline std::unique_ptr<Decoder<float>> MakeSigned32Decoder(const TensorInfo& info, const void* data)
@@ -75,10 +71,7 @@ inline std::unique_ptr<Decoder<float>> MakeDecoder(const TensorInfo& info, const
case armnn::DataType::QuantizedSymm8PerAxis:
{
std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
- return std::make_unique<QSymm8PerAxisDecoder>(
- static_cast<const int8_t*>(data),
- params.second,
- params.first);
+ return std::make_unique<QSymm8PerAxisDecoder>(static_cast<const int8_t*>(data), info);
}
ARMNN_NO_DEPRECATE_WARN_END
case DataType::QAsymmS8:
@@ -123,10 +116,7 @@ inline std::unique_ptr<Decoder<float>> MakeDecoder(const TensorInfo& info, const
if (info.HasPerAxisQuantization())
{
std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
- return std::make_unique<QSymm8PerAxisDecoder>(
- static_cast<const int8_t*>(data),
- params.second,
- params.first);
+ return std::make_unique<QSymm8PerAxisDecoder>(static_cast<const int8_t*>(data), info);
}
else
{
diff --git a/src/backends/reference/workloads/TransposeConvolution2d.cpp b/src/backends/reference/workloads/TransposeConvolution2d.cpp
index 7408e92982..a1a6cbae68 100644
--- a/src/backends/reference/workloads/TransposeConvolution2d.cpp
+++ b/src/backends/reference/workloads/TransposeConvolution2d.cpp
@@ -137,7 +137,7 @@ void TransposeConvolution2dImpl(const TransposeConvolution2dDescriptor& descript
{
for (unsigned int dOutput = 0u; dOutput < outputDepth; ++dOutput)
{
- rBiasesDecoder.SetIndex(dOutput, dOutput);
+ rBiasesDecoder[dOutput];
for (unsigned int yOutput = 0u; yOutput < outputHeight; ++yOutput)
{
for (unsigned int xOutput = 0u; xOutput < outputWidth; ++xOutput)