aboutsummaryrefslogtreecommitdiff
path: root/src/backends/backendsCommon
diff options
context:
space:
mode:
authorJan Eilers <jan.eilers@arm.com>2021-06-02 12:01:25 +0100
committerJan Eilers <jan.eilers@arm.com>2021-06-16 11:31:42 +0000
commit53ef79504b4c881c572735393c2eede5fa556c46 (patch)
treef6e0cd27c4d03075fa154074c5b12d7c8c3149f7 /src/backends/backendsCommon
parent77fe76bfa8cb798943821d1f3e432c228e1cdee3 (diff)
downloadarmnn-53ef79504b4c881c572735393c2eede5fa556c46.tar.gz
IVGCVSW-5826 Change weights layout for depthwise to [1,H,W,I*M]
* This change is necessary because tflite uses a [1,H,W,I*M] format and uses the I*M dimension for per axis quantization. Our previous layout [M,I,H,W] can't handle the correlating quantization scales. * Updates Onnx-, TfLiteParser and TfliteDelegate * Updates the CpuRef, CpuAcc and GpuAcc backends * Adjusts unit tests * Adds test to ensure models with old layout can still be read and executed * Adds conversion function to previous layout [1,H,W,I*M] --> [M,I,H,W] which can be used by backend developers !android-nn-driver:5553 Signed-off-by: Jan Eilers <jan.eilers@arm.com> Change-Id: Ifef23368b8c3702cf315a5838d214f7dc13c0152
Diffstat (limited to 'src/backends/backendsCommon')
-rw-r--r--src/backends/backendsCommon/WorkloadData.cpp38
-rw-r--r--src/backends/backendsCommon/WorkloadData.hpp14
-rw-r--r--src/backends/backendsCommon/WorkloadUtils.cpp94
-rw-r--r--src/backends/backendsCommon/WorkloadUtils.hpp34
-rw-r--r--src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp194
5 files changed, 235 insertions, 139 deletions
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index be0ac707a8..44a6a17b37 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -390,13 +390,6 @@ void ValidatePerAxisQuantizationDimension(const TensorInfo& tensorInfo,
throw InvalidArgumentException(fmt::format("{0}: Quantization dimension for per-axis quantization "
"not set on tensor {1}.", descName, tensorName));
}
-
- if (quantizationDim.value() != 0)
- {
- throw InvalidArgumentException(fmt::format(
- "{0}: Quantization dimension for per-axis quantization expected to be 0 on tensor {1}, "
- "but got: {2}", descName, tensorName, quantizationDim.value()));
- }
}
void ValidatePerAxisQuantizationOffset(const TensorInfo& tensorInfo,
@@ -1386,17 +1379,32 @@ void DepthwiseConvolution2dQueueDescriptor::Validate(const WorkloadInfo& workloa
const unsigned int channelIndex = (m_Parameters.m_DataLayout == DataLayout::NCHW) ? 1 : 3;
- // Expected weight shape: [ M, I, H, W ] - This shape does NOT depend on the data layout
+ // Expected weight shape: [ 1, H, W, I*M ] - This shape does NOT depend on the data layout
// inputChannels * channelMultiplier should be equal to outputChannels.
- const unsigned int numWeightChannelMultiplier = weightTensorInfo.GetShape()[0];
- const unsigned int numWeightInputChannels = weightTensorInfo.GetShape()[1];
- const unsigned int numWeightOutputChannels = outputTensorInfo.GetShape()[channelIndex];
- if (numWeightChannelMultiplier * numWeightInputChannels != numWeightOutputChannels)
+ const unsigned int numWeightOutputChannels = weightTensorInfo.GetShape()[3]; // I*M=Cout
+ const unsigned int numOutputChannels = outputTensorInfo.GetShape()[channelIndex];
+ if (numWeightOutputChannels != numOutputChannels)
+ {
+ throw InvalidArgumentException(fmt::format(
+ "{0}: The weight format in armnn is expected to be [1, H, W, Cout]."
+ "But 4th dimension is not equal to Cout. Cout = {1} Provided weight shape: [{2}, {3}, {4}, {5}]",
+ descriptorName,
+ numOutputChannels,
+ weightTensorInfo.GetShape()[0],
+ weightTensorInfo.GetShape()[1],
+ weightTensorInfo.GetShape()[2],
+ weightTensorInfo.GetShape()[3]));
+ }
+ if (weightTensorInfo.GetShape()[0] != 1)
{
throw InvalidArgumentException(fmt::format(
- "{0}: output_channels (provided {1}) should be equal to input_channels (provided {2}) "
- "multiplied by channel_multiplier (provided {3}).",
- descriptorName, numWeightOutputChannels, numWeightInputChannels, numWeightChannelMultiplier));
+ "{0}: The weight format in armnn is expected to be [1, H, W, Cout]."
+ "But first dimension is not equal to 1. Provided weight shape: [{1}, {2}, {3}, {4}]",
+ descriptorName,
+ weightTensorInfo.GetShape()[0],
+ weightTensorInfo.GetShape()[1],
+ weightTensorInfo.GetShape()[2],
+ weightTensorInfo.GetShape()[3]));
}
ValidateWeightDataType(inputTensorInfo, weightTensorInfo, descriptorName);
diff --git a/src/backends/backendsCommon/WorkloadData.hpp b/src/backends/backendsCommon/WorkloadData.hpp
index 77d4209657..11ce2cb44f 100644
--- a/src/backends/backendsCommon/WorkloadData.hpp
+++ b/src/backends/backendsCommon/WorkloadData.hpp
@@ -208,7 +208,19 @@ struct Convolution2dQueueDescriptor : QueueDescriptorWithParameters<Convolution2
void Validate(const WorkloadInfo& workloadInfo) const;
};
-// Depthwise Convolution 2D layer workload data.
+/// Depthwise Convolution 2D layer workload data.
+///
+/// @note
+/// The weights are in the format [1, H, W, I*M]. Where I is the input channel size, M the depthwise mutliplier and
+/// H, W is the height and width of the filter kernel. If per channel quantization is applied
+/// the weights will be quantized along the last dimension/axis (I*M) which corresponds to the output channel size.
+/// If per channel quantization is applied the weights tensor will have I*M scales, one for each dimension
+/// of the quantization axis. You have to be aware of this when reshaping the weights tensor.
+/// Splitting the I*M axis, e.g. [1, H, W, I*M] --> [H, W, I, M], won't work without taking care of the
+/// corresponding quantization scales.
+/// If there is no per channel quantization applied reshaping the weights tensor won't cause any issues. There are
+/// preconfigured permutation functions available @link WorkloadUtils.hpp here.
+///
struct DepthwiseConvolution2dQueueDescriptor : QueueDescriptorWithParameters<DepthwiseConvolution2dDescriptor>
{
DepthwiseConvolution2dQueueDescriptor()
diff --git a/src/backends/backendsCommon/WorkloadUtils.cpp b/src/backends/backendsCommon/WorkloadUtils.cpp
index c8105aea04..bd7f09b28a 100644
--- a/src/backends/backendsCommon/WorkloadUtils.cpp
+++ b/src/backends/backendsCommon/WorkloadUtils.cpp
@@ -7,6 +7,9 @@
#include <armnn/Utils.hpp>
#include <armnn/utility/NumericCast.hpp>
+#include <armnnUtils/DataLayoutIndexed.hpp>
+
+#include <fmt/format.h>
namespace armnn
{
@@ -107,6 +110,7 @@ ConstTensor ReorderWeightChannelsForAcl(const ConstTensor& weightHandle, DataLay
return ConstTensor(weightHandle.GetInfo(), permuteBuffer);
}
+
TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo& weightInfo, DataLayout dataLayout)
{
// Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
@@ -130,6 +134,96 @@ TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo& weightInfo, D
return weightPermutedInfo;
}
+
+std::tuple<ConstTensor, unsigned int> Convert1HWOTensorToAcl(const ConstTensorHandle* weightTensor,
+ const TensorInfo& inputInfo,
+ const DataLayout dataLayout,
+ void* permuteBuffer)
+{
+ TensorInfo weightsInfo = weightTensor->GetTensorInfo();
+ unsigned int depthMultiplier = 1;
+ PermutationVector permutationVector{};
+ if (dataLayout == armnn::DataLayout::NHWC)
+ {
+ // No permutation required. Data layouts are the same.
+
+ depthMultiplier = weightsInfo.GetShape()[3] / inputInfo.GetShape()[3];
+ }
+ else if (dataLayout == armnn::DataLayout::NCHW)
+ {
+ // [ 1, H, W, I*M] --> [ 1, I * M, H, W ]
+ depthMultiplier = weightsInfo.GetShape()[3] / inputInfo.GetShape()[1];
+ permutationVector = { 0, 2, 3, 1 };
+ }
+ else
+ {
+ throw InvalidArgumentException(fmt::format("Unknown data layout for tensor conversion: {}",
+ GetDataLayoutName(dataLayout)));
+ }
+
+ ConstTensor weightsPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
+
+ return std::make_tuple(weightsPermuted, depthMultiplier);
+}
+
+std::tuple<TensorInfo, unsigned int> Convert1HWOTensorInfoToAcl(const TensorInfo& weightInfo,
+ const TensorInfo& inputInfo,
+ const DataLayout dataLayout)
+{
+ unsigned int aclDepthMultiplier = 1;
+ TensorInfo weightsPermuted;
+ if (dataLayout == armnn::DataLayout::NHWC)
+ {
+ // No permutation required. Data layouts are the same.
+ aclDepthMultiplier = weightInfo.GetShape()[3] / inputInfo.GetShape()[3];
+ weightsPermuted = weightInfo;
+ }
+ else if (dataLayout == armnn::DataLayout::NCHW)
+ {
+ // [ 1, H, W, I*M] --> [ 1, I * M, H, W ]
+ aclDepthMultiplier = weightInfo.GetShape()[3] / inputInfo.GetShape()[1];
+ PermutationVector permutationVector{ 0, 2, 3, 1 };
+ weightsPermuted = armnnUtils::Permuted(weightInfo, permutationVector);
+ }
+ else
+ {
+ throw InvalidArgumentException(fmt::format("Unknown data layout for tensor info conversion: {}",
+ GetDataLayoutName(dataLayout)));
+ }
+
+ return std::make_tuple(weightsPermuted, aclDepthMultiplier);
+}
+
+
+std::tuple<ConstTensor, unsigned int> Convert1HWOtoMIHW(const ConstTensorHandle* weightTensor,
+ const TensorInfo& inputInfo,
+ const DataLayout& dataLayout,
+ void* permuteBuffer)
+{
+ TensorInfo weightsInfo = weightTensor->GetTensorInfo();
+
+ if (weightsInfo.HasPerAxisQuantization())
+ {
+ throw InvalidArgumentException("Can't convert tensor from [1,H,W,Cout] to [M,Cin,H,W] when per channel "
+ "quantization is applied.");
+ }
+
+ // Reshape weights [ 1, H, W, I*M ] --> [ H, W, I, M ]
+ auto weightsShape = weightsInfo.GetShape();
+ auto channelIndex = armnnUtils::DataLayoutIndexed(dataLayout).GetChannelsIndex();
+ unsigned int depthMultiplier = weightsShape[3] / inputInfo.GetShape()[channelIndex];
+ weightsInfo.SetShape({ weightsShape[1],
+ weightsShape[2],
+ inputInfo.GetShape()[channelIndex],
+ depthMultiplier});
+
+ // Permute [ H, W, I, M ] --> [ M, I, H, W ]
+ PermutationVector permutationVector = { 2, 3, 1, 0 };
+ ConstTensor weightsPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
+
+ return std::make_tuple(weightsPermuted, depthMultiplier);
+}
+
armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstTensorHandle* weightTensor,
DataLayout dataLayout,
void* permuteBuffer)
diff --git a/src/backends/backendsCommon/WorkloadUtils.hpp b/src/backends/backendsCommon/WorkloadUtils.hpp
index 06d2eccf3e..d2f9ca5862 100644
--- a/src/backends/backendsCommon/WorkloadUtils.hpp
+++ b/src/backends/backendsCommon/WorkloadUtils.hpp
@@ -214,8 +214,42 @@ void ReshapeWeightsForAcl(TensorInfo& weightInfo, DataLayout dataLayout);
TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo& weightInfo, DataLayout dataLayout);
+/// Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M]
+/// This function coverts a TensorInfo from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC)
+/// as required by the compute library
+/// Returns a tuple of converted weights tensor info and depth multiplier
+std::tuple<TensorInfo, unsigned int> Convert1HWOTensorInfoToAcl(const TensorInfo& weightInfo,
+ const TensorInfo& inputInfo,
+ const DataLayout dataLayout);
+
armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstTensorHandle* weightTensor,
DataLayout dataLayout,
void* permuteBuffer);
+/// Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M]
+/// This function coverts a ConstCpuTensorHandle from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or
+/// keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library
+///
+/// \param weightTensor - ConstTensorHandle of weights tensor
+/// \param inputInfo - TensorInfo of input tensor
+/// \param dataLayout - DataLayout of the input tensor
+/// \param permuteBuffer - Pointer to memory with the size of tensor. Used for the permutation
+/// \return tuple of transformed weights-ConstTensor and depthwise multiplier
+std::tuple<ConstTensor, unsigned int> Convert1HWOTensorToAcl(const ConstTensorHandle* weightTensor,
+ const TensorInfo& inputInfo,
+ const DataLayout dataLayout,
+ void* permuteBuffer);
+
+/// Converts a (weights) tensor from [1, H, W, I*M] = [1, H, W, O] to [M, I, H, W]
+///
+/// \param weightTensor - ConstTensorHandle of the weight tensor that should be converted
+/// \param inputInfo - TensorInfo of the corresponding input tensor
+/// \param dataLayout - DataLayout of the input tensor e.g. NHWC or NCHW
+/// \param permuteBuffer - Memory location with the same size as the weight tensor to write converted data to
+/// \return - A tuple of ConstTensor and unsigned int which is the converted weightTensor and the depthMultiplier
+std::tuple<ConstTensor, unsigned int> Convert1HWOtoMIHW(const ConstTensorHandle* weightTensor,
+ const TensorInfo& inputInfo,
+ const DataLayout& dataLayout,
+ void* permuteBuffer);
+
} //namespace armnn
diff --git a/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp
index 98264ee928..99f1436c98 100644
--- a/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp
+++ b/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp
@@ -1659,10 +1659,9 @@ LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
unsigned int inputChannels = armnn::numeric_cast<unsigned int>(inputShape[1]);
unsigned int inputHeight = armnn::numeric_cast<unsigned int>(inputShape[2]);
unsigned int inputWidth = armnn::numeric_cast<unsigned int>(inputShape[3]);
- unsigned int kernelChanMul = armnn::numeric_cast<unsigned int>(kernelShape[0]);
- unsigned int kernelChannels = armnn::numeric_cast<unsigned int>(kernelShape[1]);
- unsigned int kernelHeight = armnn::numeric_cast<unsigned int>(kernelShape[2]);
- unsigned int kernelWidth = armnn::numeric_cast<unsigned int>(kernelShape[3]);
+ unsigned int kernelHeight = armnn::numeric_cast<unsigned int>(kernelShape[1]);
+ unsigned int kernelWidth = armnn::numeric_cast<unsigned int>(kernelShape[2]);
+ unsigned int kernelChannels = armnn::numeric_cast<unsigned int>(kernelShape[3]);
unsigned int outputNum = armnn::numeric_cast<unsigned int>(outputExpectedShape[0]);
unsigned int outputChannels = armnn::numeric_cast<unsigned int>(outputExpectedShape[1]);
unsigned int outputHeight = armnn::numeric_cast<unsigned int>(outputExpectedShape[2]);
@@ -1677,7 +1676,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
armnn::TensorInfo outputTensorInfo =
armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
- armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
+ armnn::TensorInfo kernelDesc({1, kernelHeight, kernelWidth, kernelChannels}, ArmnnType);
armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
// Set quantization parameters if the requested type is a quantized type.
@@ -1792,19 +1791,17 @@ LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(
unsigned int kernelHeight = 3;
unsigned int kernelWidth = 3;
- unsigned int kernelChannels = inputChannels;
- unsigned int kernelDepthMultiplier = 1;
unsigned int outputHeight = 1;
unsigned int outputWidth = 1;
- unsigned int outputChannels = kernelChannels;
+ unsigned int outputChannels = inputChannels;
unsigned int outputNum = inputNum;
armnn::TensorInfo inputTensorInfo =
armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
armnn::TensorInfo outputTensorInfo =
armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
- armnn::TensorInfo kernelDesc({kernelDepthMultiplier, kernelChannels, kernelHeight, kernelWidth},
+ armnn::TensorInfo kernelDesc({1, kernelHeight, kernelWidth, outputChannels},
ArmnnType);
armnn::TensorInfo biasDesc({ outputChannels }, ArmnnBType);
@@ -1955,7 +1952,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
inputBatchSize, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo(
outputBatchSize, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
- armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth},
+ armnn::TensorInfo kernelDesc({1, kernelHeight, kernelWidth, outputChannels},
ArmnnType);
armnn::TensorInfo biasDesc({outputChannels}, ArmnnBType);
@@ -2040,33 +2037,18 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
// Manually calculated.
std::vector<T> originalOutputImage = std::vector<T>(
QuantizedVector<T>({
- 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f,
- 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f,
- 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
- 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
- 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f,
- 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f,
-
- -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
- 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
- -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
- -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
- -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
- -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f,
-
- 8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
- 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
- 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
- 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
- 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
- 8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
-
- 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 5, 5, 5, 5, 5, 5, 5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5,
+ 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5, 5, 5, 5, 5, 5, 5,
+ 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5,
+ 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 1, 3, 0, 0, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0,
+ 2, 4, 0, 0, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0,
+ 2, 4, 0, 0, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0,
+ 2, 4, 0, 0, 0, 0, 0, 3, 5, 0, 0, 0, 0, 0,
+ 3, 5, 0, 0, 0, 0, 0, 3, 5, 0, 0, 0, 0, 0,
+ 3, 5, 0, 0, 0, 0, 0, 3, 5, 0, 0, 0, 0, 0
},
outputTensorInfo.GetQuantizationScale(),
outputTensorInfo.GetQuantizationOffset()));
@@ -2170,10 +2152,9 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
unsigned int outputChannels = armnn::numeric_cast<unsigned int>(originalOutputExpectedShape[1]);
unsigned int outputNum = armnn::numeric_cast<unsigned int>(originalOutputExpectedShape[0]);
- unsigned int kernelHeight = armnn::numeric_cast<unsigned int>(originalKernelShape[2]);
- unsigned int kernelWidth = armnn::numeric_cast<unsigned int>(originalKernelShape[3]);
- unsigned int kernelChannels = armnn::numeric_cast<unsigned int>(originalKernelShape[1]);
- unsigned int kernelDepthMul = armnn::numeric_cast<unsigned int>(originalKernelShape[0]);
+ unsigned int kernelHeight = armnn::numeric_cast<unsigned int>(originalKernelShape[1]);
+ unsigned int kernelWidth = armnn::numeric_cast<unsigned int>(originalKernelShape[2]);
+ unsigned int kernelChannels = armnn::numeric_cast<unsigned int>(originalKernelShape[3]);
bool biasEnabled = bias.size() > 0;
@@ -2192,7 +2173,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
// Kernel must be NCHW layout always, independently of the layout of the input and output for depthwise convolution.
- armnn::TensorInfo kernelDesc({kernelDepthMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType);
+ armnn::TensorInfo kernelDesc({1, kernelHeight, kernelWidth, kernelChannels}, ArmnnType);
armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
@@ -2332,9 +2313,9 @@ LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon(
inputTensorInfo.GetQuantizationOffset());
// Use a depth multiplier of 1 on a 2-channel 4x4 kernel.
- armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
- auto kernel = QuantizedVector<T>(
- {
+ // Weights layout for depthwise: [1,H,W,I*M]
+ armnn::TensorInfo kernelTensorInfo({ 1, 4, 4, 2 }, ArmnnType);
+ auto kernel = QuantizedVector<T>({
32, 31, 30, 29,
28, 27, 26, 25,
24, 23, 22, 21,
@@ -2353,17 +2334,10 @@ LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon(
armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
auto expectedOutput = QuantizedVector<T>(
{
- 1062, 1580, 1850, 1530, 1117,
- 2140, 3108, 3500, 2842, 2042,
- 3580, 5068, 5460, 4342, 3062,
- 3618, 5072, 5390, 4248, 2971,
- 3074, 4282, 4510, 3533, 2457,
-
- 1550, 2284, 2362, 1955, 1428,
- 2910, 4206, 4342, 3528, 2536,
- 3390, 4886, 5022, 4068, 2916,
- 3566, 5056, 5182, 4133, 2922,
- 3100, 4352, 4452, 3517, 2465
+ 396, 664, 820, 756, 602, 1016, 1608, 1880, 1652, 1268, 1976, 2968, 3240, 2732,
+ 2028, 2628, 3808, 4060, 3312, 2390, 2596, 3700, 3900, 3130, 2226, 2817, 4186,
+ 4330, 3609, 2651, 5414, 7864, 8120, 6626, 4780, 6314, 9144, 9400, 7646, 5500,
+ 6759, 9610, 9850, 7875, 5579, 5935, 8348, 8540, 6757, 4742
},
outputTensorInfo.GetQuantizationScale(),
outputTensorInfo.GetQuantizationOffset());
@@ -2420,9 +2394,8 @@ LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestCommon(
inputTensorInfo.GetQuantizationScale(),
inputTensorInfo.GetQuantizationOffset());
- armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
- auto kernel = QuantizedVector<T>(
- {
+ armnn::TensorInfo kernelTensorInfo({ 1, 4, 4, 2 }, ArmnnType);
+ auto kernel = QuantizedVector<T>({
32, 31, 30, 29,
28, 27, 26, 25,
24, 23, 22, 21,
@@ -2439,17 +2412,17 @@ LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestCommon(
armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
auto expectedOutput = QuantizedVector<T>(
{
- 1062, 1580, 1850, 1530, 1117,
- 2140, 3108, 3500, 2842, 2042,
- 3580, 5068, 5460, 4342, 3062,
- 3618, 5072, 5390, 4248, 2971,
- 3074, 4282, 4510, 3533, 2457,
-
- 1550, 2284, 2362, 1955, 1428,
- 2910, 4206, 4342, 3528, 2536,
- 3390, 4886, 5022, 4068, 2916,
- 3566, 5056, 5182, 4133, 2922,
- 3100, 4352, 4452, 3517, 2465
+ 396,664,820,756,602,
+ 1016,1608,1880,1652,1268,
+ 1976,2968,3240,2732,2028,
+ 2628,3808,4060,3312,2390,
+ 2596,3700,3900,3130,2226,
+
+ 2817,4186,4330,3609,2651,
+ 5414,7864,8120,6626,4780,
+ 6314,9144,9400,7646,5500,
+ 6759,9610,9850,7875,5579,
+ 5935,8348,8540,6757,4742
},
outputTensorInfo.GetQuantizationScale(),
outputTensorInfo.GetQuantizationOffset());
@@ -2504,9 +2477,8 @@ LayerTestResult<T, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon(
inputTensorInfo.GetQuantizationScale(),
inputTensorInfo.GetQuantizationOffset());
- armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3 }, ArmnnType);
- auto kernel = QuantizedVector<T>(
- {
+ armnn::TensorInfo kernelTensorInfo({ 1, 3, 3, 1}, ArmnnType);
+ auto kernel = QuantizedVector<T>({
1, 2, 3,
4, 5, 6,
7, 8, 9
@@ -2671,7 +2643,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2d3x3Dilation3x3Test(
0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
- armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
+ armnn::TensorInfo kernelTensorInfo({ 1, 3, 3, 1}, ArmnnType);
std::vector<float> kernelNoQuantizedValues =
{
1, 2, 3,
@@ -2740,7 +2712,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test(
0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
- armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
+ armnn::TensorInfo kernelTensorInfo({ 1, 3, 3, 2}, ArmnnType);
std::vector<float> kernelNoQuantizedValues =
{
1, 2, 3,
@@ -2757,15 +2729,9 @@ LayerTestResult<T, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test(
armnn::TensorInfo outputTensorInfo({ 1, 2, 4, 4}, ArmnnType);
std::vector<float> outputExpectedNoQuantizedValues =
{
- 6., 5., 5., 5.,
- 6., 5., 5., 5.,
- 6., 5., 5., 5.,
- 3., 2., 2., 2.,
+ 2, 9, 9, 9, 2, 9, 9, 9, 2, 9, 9, 9, 5, 3, 3, 3, 3,
- 6., 5., 5., 5.,
- 6., 5., 5., 5.,
- 6., 5., 5., 5.,
- 3., 2., 2., 2.
+ 1, 1, 1, 3, 1, 1, 1, 3, 1, 1, 1, 6, 4, 4, 4
};
return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
@@ -2804,7 +2770,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dMult4Test(
27.0, 28.0, 29.0
};
- armnn::TensorInfo kernelTensorInfo({ 4, 2, 2, 2}, ArmnnType);
+ armnn::TensorInfo kernelTensorInfo({ 1, 2, 2, 8}, ArmnnType);
std::vector<float> kernelNoQuantizedValues =
{
@@ -2836,29 +2802,10 @@ LayerTestResult<T, 4> DepthwiseConvolution2dMult4Test(
armnn::TensorInfo outputTensorInfo({ 1, 8, 2, 2}, ArmnnType);
std::vector<float> outputExpectedNoQuantizedValues =
{
- 10.f, 10.f,
- 10.f, 10.f,
-
- 1.f, 1.f,
- 1.f, 1.f,
-
- 2.f, 2.f,
- 2.f, 2.f,
-
- 3.f, 3.f,
- 3.f, 3.f,
-
- 23.f, 24.f,
- 26.f, 27.f,
-
- 2.5f, 2.6000001f,
- 2.8f, 2.9f,
-
- 4.2000003f, 4.4f,
- 4.8f, 5.f,
-
- 6.6000004f, 6.9f,
- 7.5000005f, 7.8f
+ 4.5f, 4.5f, 4.5f, 4.5f, 5.5f, 5.5f, 5.5f, 5.5f,
+ 2.5f, 2.5f, 2.5f, 2.5f, 3.5f, 3.5f, 3.5f, 3.5f,
+ 10.05f, 10.5f, 11.4f, 11.85f, 12.75f, 13.3f, 14.4f, 14.95f,
+ 5.25f, 5.5f, 6.0f, 6.25f, 7.45f, 7.8f, 8.5f, 8.85f
};
@@ -2898,7 +2845,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dMult2Test(
27.0, 28.0, 29.0
};
- armnn::TensorInfo kernelTensorInfo({ 2, 2, 2, 2}, ArmnnType);
+ armnn::TensorInfo kernelTensorInfo({ 1, 2, 2, 4}, ArmnnType);
std::vector<float> kernelNoQuantizedValues =
{
@@ -2919,17 +2866,10 @@ LayerTestResult<T, 4> DepthwiseConvolution2dMult2Test(
armnn::TensorInfo outputTensorInfo({ 1, 4, 2, 2}, ArmnnType);
std::vector<float> outputExpectedNoQuantizedValues =
{
- 10.f, 10.f,
- 10.f, 10.f,
-
- 1.f, 1.f,
- 1.f, 1.f,
-
- 4.2000003f, 4.4f,
- 4.8f, 5.f,
-
- 6.6000004f, 6.9f,
- 7.5000005f, 7.8f
+ 4.5f, 4.5f, 4.5f, 4.5f,
+ 5.5f, 5.5f, 5.5f, 5.5f,
+ 5.25f, 5.5f, 6.0f, 6.25f,
+ 7.65f, 8.0f, 8.7f, 9.05f
};
@@ -2984,7 +2924,7 @@ LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(
std::vector<unsigned int> inputShape;
std::vector<unsigned int> outputShape;
- std::vector<unsigned int> kernelShape{ channelMultiplier, inputChannels, kernelHeight, kernelWidth };
+ std::vector<unsigned int> kernelShape{ 1, kernelHeight, kernelWidth, outputChannels };
std::vector<unsigned int> biasShape{ outputChannels };
switch (layout.GetDataLayout())
{
@@ -3609,6 +3549,14 @@ LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul64Test(
}
armnn::TensorInfo kernelTensorInfo({ 64, 1, 2, 2 }, armnn::DataType::Float32);
+ // permute from [O,1,H,W] --> [1,H,W,O]
+ armnn::PermutationVector permutationVector {3,0,1,2};
+ kernelTensorInfo = armnnUtils::Permuted(kernelTensorInfo, permutationVector);
+ std::vector<float> kernelPermuted(kernelTensorInfo.GetNumElements());
+ armnnUtils::Permute(kernelTensorInfo.GetShape(), permutationVector,
+ kernelData.data(), kernelPermuted.data(),
+ GetDataTypeSize(kernelTensorInfo.GetDataType()));
+
std::vector<float> expectedOutputData(64, 0.f);
armnn::TensorInfo outputTensorInfo({ 1, 64, 1, 1 }, armnn::DataType::Float32);
@@ -3617,7 +3565,7 @@ LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul64Test(
memoryManager,
tensorHandleFactory,
input,
- kernelData,
+ kernelPermuted,
std::vector<float>(),
expectedOutputData,
inputTensorInfo.GetShape(),
@@ -3713,8 +3661,8 @@ LayerTestResult<uint8_t, 4> DepthwiseConvolution2dPerAxisQuantTest(
TensorInfo outputInfo({ 1, 2, 2, 4 }, inputType, 1.0f, 128); // N H W C
const std::vector<float> quantScales{ 1.0f, 0.5f, 1.0f, 0.5f };
- const unsigned int quantDimension = 0;
- TensorInfo kernelInfo({ 2, 2, 2, 2 }, kernelType, quantScales, quantDimension); // M I H W
+ const unsigned int quantDimension = 3;
+ TensorInfo kernelInfo({ 1, 2, 2, 4 }, kernelType, quantScales, quantDimension); // [1, H, W, I*M]
const std::vector<float> biasQuantScales{ 0.5f, 0.25f, 0.5f, 0.25f };
constexpr unsigned int biasQuantDimension = 0;