aboutsummaryrefslogtreecommitdiff
path: root/src/backends/reference/workloads
diff options
context:
space:
mode:
authorJan Eilers <jan.eilers@arm.com>2021-06-02 12:01:25 +0100
committerJan Eilers <jan.eilers@arm.com>2021-06-16 11:31:42 +0000
commit53ef79504b4c881c572735393c2eede5fa556c46 (patch)
treef6e0cd27c4d03075fa154074c5b12d7c8c3149f7 /src/backends/reference/workloads
parent77fe76bfa8cb798943821d1f3e432c228e1cdee3 (diff)
downloadarmnn-53ef79504b4c881c572735393c2eede5fa556c46.tar.gz
IVGCVSW-5826 Change weights layout for depthwise to [1,H,W,I*M]
* This change is necessary because tflite uses a [1,H,W,I*M] format and uses the I*M dimension for per axis quantization. Our previous layout [M,I,H,W] can't handle the correlating quantization scales. * Updates Onnx-, TfLiteParser and TfliteDelegate * Updates the CpuRef, CpuAcc and GpuAcc backends * Adjusts unit tests * Adds test to ensure models with old layout can still be read and executed * Adds conversion function to previous layout [1,H,W,I*M] --> [M,I,H,W] which can be used by backend developers !android-nn-driver:5553 Signed-off-by: Jan Eilers <jan.eilers@arm.com> Change-Id: Ifef23368b8c3702cf315a5838d214f7dc13c0152
Diffstat (limited to 'src/backends/reference/workloads')
-rw-r--r--src/backends/reference/workloads/BaseIterator.hpp180
-rw-r--r--src/backends/reference/workloads/ConvImpl.cpp31
-rw-r--r--src/backends/reference/workloads/Decoders.hpp16
-rw-r--r--src/backends/reference/workloads/TransposeConvolution2d.cpp2
4 files changed, 98 insertions, 131 deletions
diff --git a/src/backends/reference/workloads/BaseIterator.hpp b/src/backends/reference/workloads/BaseIterator.hpp
index 73e24691d9..483ef720f9 100644
--- a/src/backends/reference/workloads/BaseIterator.hpp
+++ b/src/backends/reference/workloads/BaseIterator.hpp
@@ -8,7 +8,9 @@
#include <armnn/TypesUtils.hpp>
#include <armnn/utility/Assert.hpp>
#include <armnn/utility/IgnoreUnused.hpp>
+#include <armnn/utility/NumericCast.hpp>
#include <armnnUtils/FloatingPointConverter.hpp>
+#include <armnnUtils/TensorUtils.hpp>
#include <ResolveType.hpp>
@@ -22,8 +24,6 @@ public:
virtual ~BaseIterator() {}
- virtual BaseIterator& SetIndex(unsigned int index, unsigned int axisIndex = 0) = 0;
-
virtual BaseIterator& operator++() = 0;
virtual BaseIterator& operator+=(const unsigned int increment) = 0;
@@ -47,7 +47,6 @@ public:
virtual std::vector<float>
DecodeTensor(const TensorShape &tensorShape,
- const unsigned int channelMultiplier = 1,
bool isDepthwise = false) = 0;
};
@@ -108,14 +107,6 @@ public:
return *this;
}
- TypedIterator& SetIndex(unsigned int index, unsigned int axisIndex = 0) override
- {
- IgnoreUnused(axisIndex);
- ARMNN_ASSERT(m_Iterator);
- m_Iterator = m_Start + index;
- return *this;
- }
-
protected:
T* m_Iterator;
T* m_Start;
@@ -135,10 +126,9 @@ public:
return armnn::Dequantize(*m_Iterator, m_Scale, m_Offset);
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -173,10 +163,9 @@ public:
return armnn::Dequantize(*m_Iterator, m_Scale, m_Offset);
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -211,10 +200,9 @@ public:
return armnn::Dequantize(*m_Iterator, m_Scale, m_Offset);
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -249,10 +237,9 @@ public:
return armnn::Dequantize(*m_Iterator, m_Scale, m_Offset);
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -289,10 +276,9 @@ public:
return val;
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -328,10 +314,9 @@ public:
return val;
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -365,10 +350,9 @@ public:
return *m_Iterator;
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -393,10 +377,9 @@ public:
return static_cast<float>(*m_Iterator) * m_Scale;
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -430,10 +413,9 @@ public:
return static_cast<float>(*m_Iterator);
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -463,10 +445,9 @@ public:
return *m_Iterator;
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -496,10 +477,9 @@ public:
return *m_Iterator;
}
std::vector<float> DecodeTensor (const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -530,10 +510,9 @@ public:
}
std::vector<float> DecodeTensor(const TensorShape& tensorShape,
- const unsigned int channelMultiplier,
const bool isDepthwise) override
{
- IgnoreUnused(channelMultiplier, isDepthwise);
+ IgnoreUnused(isDepthwise);
const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
@@ -769,23 +748,33 @@ public:
}
};
-// PerAxisIterator for per-axis quantization
+/// PerAxisIterator for per-axis quantization. Iterates over a tensor as layed out in memory and keeps track
+/// of the axis index.
template<typename T, typename Base>
class PerAxisIterator : public Base
{
public:
- // axisFactor is used to calculate channelStep
- PerAxisIterator(T* data = nullptr, unsigned int axisFactor = 0)
- : m_Iterator(data), m_Start(data), m_AxisIndex(0), m_AxisFactor(axisFactor)
+ PerAxisIterator(T* data = nullptr,
+ unsigned int axisFactor = 0,
+ unsigned int axisDimensionality=0)
+ : m_Iterator(data),
+ m_Start(data),
+ m_AxisIndex(0), // iterates over the dimension of axis
+ m_AxisDimensionality(axisDimensionality), // tensorShape[quantization_dim]
+ m_AxisFactor(axisFactor),
+ m_Index(0)
{}
- // This should be called to set index for per-axis Encoder/Decoder
- PerAxisIterator& SetIndex(unsigned int index, unsigned int axisIndex) override
+ PerAxisIterator(T* data = nullptr,
+ const armnn::TensorShape& tensorShape = TensorShape(),
+ const unsigned int axis = 0)
+ : m_Iterator(data),
+ m_Start(data),
+ m_AxisIndex(0),
+ m_Index(0)
{
- ARMNN_ASSERT(m_Iterator);
- m_Iterator = m_Start + index;
- m_AxisIndex = axisIndex;
- return *this;
+ m_AxisDimensionality = tensorShape[axis];
+ m_AxisFactor = armnnUtils::GetNumElementsAfter(tensorShape, axis);
}
void Reset(void* data) override
@@ -793,37 +782,50 @@ public:
m_Iterator = reinterpret_cast<T*>(data);
m_Start = m_Iterator;
m_AxisIndex = 0;
+ m_Index = 0;
}
PerAxisIterator& operator++() override
{
- ARMNN_ASSERT(m_Iterator);
- ++m_Iterator;
- m_AxisIndex = static_cast<unsigned int>(*m_Iterator) % m_AxisFactor;
+ ++m_Index;
+ this -> operator[](m_Index);
return *this;
}
PerAxisIterator& operator+=(const unsigned int increment) override
{
- ARMNN_ASSERT(m_Iterator);
- m_Iterator += increment;
- m_AxisIndex = static_cast<unsigned int>(*m_Iterator) % m_AxisFactor;
+ m_Index += increment;
+ this -> operator[](m_Index);
return *this;
}
PerAxisIterator& operator-=(const unsigned int decrement) override
{
- ARMNN_ASSERT(m_Iterator);
- m_Iterator -= decrement;
- m_AxisIndex = static_cast<unsigned int>(*m_Iterator) % m_AxisFactor;
+ m_Index -= decrement;
+ this -> operator[](m_Index);
return *this;
}
- PerAxisIterator& operator[](const unsigned int index) override
+
+ inline PerAxisIterator& SetIndexOnMem(const unsigned int index)
{
ARMNN_ASSERT(m_Iterator);
m_Iterator = m_Start + index;
- m_AxisIndex = static_cast<unsigned int>(*m_Iterator) % m_AxisFactor;
+ if (index < m_AxisFactor)
+ {
+ m_AxisIndex = 0;
+ }
+ else
+ {
+ m_AxisIndex = (index / m_AxisFactor) % m_AxisDimensionality;
+ }
+ m_Index = index;
+ return *this;
+ }
+
+ PerAxisIterator& operator[](const unsigned int index) override
+ {
+ SetIndexOnMem(index);
return *this;
}
@@ -831,18 +833,22 @@ public:
T* m_Iterator;
T* m_Start;
unsigned int m_AxisIndex;
+ unsigned int m_AxisDimensionality; // tensorShape[quantization_dim]
unsigned int m_AxisFactor;
+ unsigned int m_Index;
};
class QSymm8PerAxisDecoder : public PerAxisIterator<const int8_t, Decoder<float>>
{
public:
- QSymm8PerAxisDecoder(const int8_t* data, const std::vector<float>& scale, unsigned int axisFactor)
- : PerAxisIterator(data, axisFactor), m_Scales(scale) {}
+ QSymm8PerAxisDecoder(const int8_t* data, const armnn::TensorInfo& tensorInfo)
+ : PerAxisIterator(data, tensorInfo.GetShape(), tensorInfo.GetQuantizationDim().value()),
+ m_Scales(tensorInfo.GetQuantizationScales())
+ {}
float Get() const override
{
- return armnn::Dequantize(*m_Iterator, m_Scales[m_AxisIndex], 0);
+ return armnn::Dequantize(*m_Iterator, GetScale(), 0);
}
// Get scale of the current value
@@ -852,37 +858,18 @@ public:
}
std::vector<float> DecodeTensor(const TensorShape &tensorShape,
- const unsigned int channelMultiplier,
bool isDepthwise) override
{
- const uint32_t size = tensorShape.GetNumElements();
- const uint32_t scaleSize = static_cast<uint32_t>(m_Scales.size());
-
- const uint32_t stepSize = isDepthwise ?
- tensorShape[2] * tensorShape[3] : tensorShape.GetNumElements() / tensorShape[0];
-
- const uint32_t stepNum = size / (stepSize * channelMultiplier);
- uint32_t scale;
+ IgnoreUnused(isDepthwise);
+ const unsigned int size = tensorShape.GetNumElements();
std::vector<float> decodedTensor;
decodedTensor.reserve(size);
- // channelMultiplier is only used in depthwise convolutions and in other cases will have no effect
- // stepSize is the length of a contiguous area sharing a quantization scale within a tensor
- // stepNum is the number of those steps/blocks in the tensor
- for (uint32_t mult = 0; mult < channelMultiplier; ++mult)
+ for (uint32_t i = 0; i < size; ++i)
{
- for (uint32_t step = 0; step < stepNum; ++step)
- {
- scale = (channelMultiplier * step + mult) % scaleSize;
- for (uint32_t i = 0; i < stepSize; ++i)
- {
- unsigned int index = mult * stepSize * channelMultiplier +
- step * stepSize + i;
- this->operator[](index);
- decodedTensor.emplace_back(armnn::Dequantize(*m_Iterator, m_Scales[scale], 0));
- }
- }
+ SetIndexOnMem(i);
+ decodedTensor.emplace_back(armnn::Dequantize(*m_Iterator, GetScale(), 0));
}
return decodedTensor;
}
@@ -920,8 +907,10 @@ private:
class ScaledInt32PerAxisDecoder : public PerAxisIterator<const int32_t, Decoder<float>>
{
public:
- ScaledInt32PerAxisDecoder(const int32_t* data, const std::vector<float>& scales, unsigned int axisFactor)
- : PerAxisIterator(data, axisFactor), m_Scales(scales) {}
+ ScaledInt32PerAxisDecoder(const int32_t* data, const armnn::TensorInfo tensorInfo)
+ : PerAxisIterator(data, tensorInfo.GetShape(), tensorInfo.GetQuantizationDim().value()),
+ m_Scales(tensorInfo.GetQuantizationScales())
+ {}
float Get() const override
{
@@ -935,17 +924,14 @@ public:
}
std::vector<float> DecodeTensor(const TensorShape &tensorShape,
- const unsigned int channelMultiplier,
bool isDepthwise) override
{
const uint32_t size = tensorShape.GetNumElements();
- const uint32_t scaleSize = static_cast<uint32_t>(m_Scales.size());
const uint32_t stepSize = isDepthwise ?
tensorShape[2] * tensorShape[3] : tensorShape.GetNumElements() / tensorShape[0];
- const uint32_t stepNum = size / (stepSize * channelMultiplier);
- uint32_t scale;
+ const uint32_t stepNum = size / stepSize;
std::vector<float> decodedTensor;
decodedTensor.reserve(size);
@@ -953,18 +939,14 @@ public:
// channelMultiplier is only used in depthwise convolutions and in other cases will have no effect
// stepSize is the length of a contiguous area sharing a quantization scale within a tensor
// stepNum is the number of those steps/blocks in the tensor
- for (uint32_t mult = 0; mult < channelMultiplier; ++mult)
+ for (uint32_t step = 0; step < stepNum; ++step)
{
- for (uint32_t step = 0; step < stepNum; ++step)
+ //scale = (channelMultiplier * step + mult) % scaleSize;
+ for (uint32_t i = 0; i < stepSize; ++i)
{
- scale = (channelMultiplier * step + mult) % scaleSize;
- for (uint32_t i = 0; i < stepSize; ++i)
- {
- unsigned int index = mult * stepSize * channelMultiplier +
- step * stepSize + i;
- this->operator[](index);
- decodedTensor.emplace_back(armnn::Dequantize(*m_Iterator, m_Scales[scale], 0));
- }
+ unsigned int index = step * stepSize + i;
+ this->operator[](index);
+ decodedTensor.emplace_back(armnn::Dequantize(*m_Iterator, m_Scales[step], 0));
}
}
return decodedTensor;
diff --git a/src/backends/reference/workloads/ConvImpl.cpp b/src/backends/reference/workloads/ConvImpl.cpp
index d7845535df..e1bbc6bc52 100644
--- a/src/backends/reference/workloads/ConvImpl.cpp
+++ b/src/backends/reference/workloads/ConvImpl.cpp
@@ -95,9 +95,12 @@ void Convolve(const TensorShape& rInputShape,
const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
- const unsigned int depthMultiplier = depthwise ? rFilterShape[0] : 1;
- const unsigned int inputChannels = depthwise ? rFilterShape[1] : rFilterShape[channelsIndex];
- const unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : rFilterShape[0];
+ // Weights layout:
+ // Conv2d: [O,H,W,I]
+ // Depthwise: [1,H,W,O]
+ const unsigned int inputChannels = rInputShape[channelsIndex];
+ const unsigned int outputChannels = rOutputShape[channelsIndex];
+ const unsigned int depthMultiplier = depthwise ? outputChannels/inputChannels : 1;
const unsigned int batchSize = rOutputShape[0];
const unsigned int outputHeight = rOutputShape[heightIndex];
@@ -105,16 +108,15 @@ void Convolve(const TensorShape& rInputShape,
const unsigned int inputHeight = rInputShape[heightIndex];
const unsigned int inputWidth = rInputShape[widthIndex];
- const unsigned int filterHeight = depthwise ? rFilterShape[2] : rFilterShape[heightIndex];
- const unsigned int filterWidth = depthwise ? rFilterShape[3] : rFilterShape[widthIndex];
+ const unsigned int filterHeight = depthwise ? rFilterShape[1] : rFilterShape[heightIndex];
+ const unsigned int filterWidth = depthwise ? rFilterShape[2] : rFilterShape[widthIndex];
const std::vector<float> inputVec = rInputDecoder.DecodeTensor(rInputShape);
- const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape, depthMultiplier, depthwise);
+ const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape, depthwise);
const TensorShape biasShape{outputChannels};
const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
- unsigned int depthwiseMultiplierIdx = 0;
for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
{
for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
@@ -130,13 +132,6 @@ void Convolve(const TensorShape& rInputShape,
// For normal, must loop over each input channel.
for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
{
- if (depthwise)
- {
- depthwiseMultiplierIdx = 0;
- cInput = cOutput / depthMultiplier;
- depthwiseMultiplierIdx = cOutput % depthMultiplier;
- }
-
for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
{
for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
@@ -147,10 +142,10 @@ void Convolve(const TensorShape& rInputShape,
// Since dimensionality of kernel depends on depthwiseness, so does index.
if (depthwise)
{
- filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels +
- cInput * filterWidth * filterHeight +
- yFilter * filterWidth +
- xFilter;
+ cInput = cOutput / depthMultiplier;
+ // filterDepth = outputChannels;
+ filterIndex = xFilter * outputChannels + cOutput +
+ yFilter * filterWidth * outputChannels;
}
else
{
diff --git a/src/backends/reference/workloads/Decoders.hpp b/src/backends/reference/workloads/Decoders.hpp
index 0b3f36047d..cd0dc5d40f 100644
--- a/src/backends/reference/workloads/Decoders.hpp
+++ b/src/backends/reference/workloads/Decoders.hpp
@@ -20,11 +20,7 @@ namespace
inline std::unique_ptr<Decoder<float>> MakeSigned32PerAxisDecoder(const TensorInfo& info, const void* data)
{
- auto params = armnnUtils::GetPerAxisParams(info);
- return std::make_unique<ScaledInt32PerAxisDecoder>(
- static_cast<const int32_t*>(data),
- params.second,
- params.first);
+ return std::make_unique<ScaledInt32PerAxisDecoder>(static_cast<const int32_t*>(data), info);
}
inline std::unique_ptr<Decoder<float>> MakeSigned32Decoder(const TensorInfo& info, const void* data)
@@ -75,10 +71,7 @@ inline std::unique_ptr<Decoder<float>> MakeDecoder(const TensorInfo& info, const
case armnn::DataType::QuantizedSymm8PerAxis:
{
std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
- return std::make_unique<QSymm8PerAxisDecoder>(
- static_cast<const int8_t*>(data),
- params.second,
- params.first);
+ return std::make_unique<QSymm8PerAxisDecoder>(static_cast<const int8_t*>(data), info);
}
ARMNN_NO_DEPRECATE_WARN_END
case DataType::QAsymmS8:
@@ -123,10 +116,7 @@ inline std::unique_ptr<Decoder<float>> MakeDecoder(const TensorInfo& info, const
if (info.HasPerAxisQuantization())
{
std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
- return std::make_unique<QSymm8PerAxisDecoder>(
- static_cast<const int8_t*>(data),
- params.second,
- params.first);
+ return std::make_unique<QSymm8PerAxisDecoder>(static_cast<const int8_t*>(data), info);
}
else
{
diff --git a/src/backends/reference/workloads/TransposeConvolution2d.cpp b/src/backends/reference/workloads/TransposeConvolution2d.cpp
index 7408e92982..a1a6cbae68 100644
--- a/src/backends/reference/workloads/TransposeConvolution2d.cpp
+++ b/src/backends/reference/workloads/TransposeConvolution2d.cpp
@@ -137,7 +137,7 @@ void TransposeConvolution2dImpl(const TransposeConvolution2dDescriptor& descript
{
for (unsigned int dOutput = 0u; dOutput < outputDepth; ++dOutput)
{
- rBiasesDecoder.SetIndex(dOutput, dOutput);
+ rBiasesDecoder[dOutput];
for (unsigned int yOutput = 0u; yOutput < outputHeight; ++yOutput)
{
for (unsigned int xOutput = 0u; xOutput < outputWidth; ++xOutput)