From 7612bd6cc385dfbf54f831a6349f3a9363c6d0a2 Mon Sep 17 00:00:00 2001
From: Jan Eilers <jan.eilers@arm.com>
Date: Tue, 6 Apr 2021 17:29:03 +0100
Subject: IVGCVSW-5842 Remove cross-wiring in depthwise

 * Reading tensor infos won't allow a permutation vector anymore.
   The permutation only changed the quantization dimension not the
   shape and was therefore misleading
 * The permutation of the full tensor info is now performed in
   armnnUtils::Permuted
 * Changed TfLite Parser depthwise parsing function
 * Added unit tests to TfLite Parser with more random data
 * Changed TfLite Delegate depthwise parsing function
 * Added unit test to the delegate with per channel quantization

!android-nn-driver:5412

Signed-off-by: Jan Eilers <jan.eilers@arm.com>
Change-Id: I1f985ee69547bcaf16a72201e00a6b6fe1ef9a97
---
 delegate/src/Convolution.hpp                       |   2 +-
 delegate/src/DelegateUtils.hpp                     |  13 +-
 delegate/src/test/Convolution2dTest.cpp            |   6 +-
 delegate/src/test/ConvolutionTestHelper.hpp        |  50 ++-
 delegate/src/test/DepthwiseConvolution2dTest.cpp   | 114 +++++-
 include/armnnUtils/Permute.hpp                     |   3 +-
 src/armnn/test/UtilsTests.cpp                      |  16 +-
 src/armnnTfLiteParser/TfLiteParser.cpp             |  14 +-
 .../test/DepthwiseConvolution2D.cpp                | 424 ++++++++++++++++++++-
 src/armnnUtils/Permute.cpp                         |   8 +-
 10 files changed, 582 insertions(+), 68 deletions(-)
diff --git a/delegate/src/Convolution.hpp b/delegate/src/Convolution.hpp
index 153f44953c..6566ffff44 100644
--- a/delegate/src/Convolution.hpp
+++ b/delegate/src/Convolution.hpp
@@ -291,7 +291,7 @@ TfLiteStatus VisitDepthwiseConv2dOperator(DelegateData& delegateData,
 
     // Mappings from TensorflowLite filter tensors to the ArmNN filter tensors (ArmNN weights have to be [M, I, H, W])
     armnn::PermutationVector permutationVector{ 2, 3, 1, 0 }; // [H, W, I, M] -> [M, I, H, W]
-    armnn::TensorInfo filterTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteFilterTensor, permutationVector);
+    armnn::TensorInfo filterTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteFilterTensor);
 
     // Assuming input is NHWC
     unsigned int inputHeight = inputTensorInfo.GetShape()[1];
diff --git a/delegate/src/DelegateUtils.hpp b/delegate/src/DelegateUtils.hpp
index deed61dc5f..76d21f6332 100644
--- a/delegate/src/DelegateUtils.hpp
+++ b/delegate/src/DelegateUtils.hpp
@@ -398,8 +398,7 @@ armnn::DataType GetDataType(const TfLiteTensor& tfLiteTensor)
     }
 }
 
-armnn::TensorInfo GetTensorInfoForTfLiteTensor(const TfLiteTensor& tfLiteTensor,
-                                               const armnn::PermutationVector& dimensionMappings = {0, 1, 2, 3})
+armnn::TensorInfo GetTensorInfoForTfLiteTensor(const TfLiteTensor& tfLiteTensor)
 {
     armnn::DataType type = GetDataType(tfLiteTensor);
     armnn::TensorInfo ret;
@@ -453,8 +452,7 @@ armnn::TensorInfo GetTensorInfoForTfLiteTensor(const TfLiteTensor& tfLiteTensor,
                 quantizationScales.push_back(affineQuantization->scale->data[i]);
             }
             ret.SetQuantizationScales(quantizationScales);
-            ret.SetQuantizationDim(dimensionMappings[armnn::numeric_cast<unsigned int>(
-                affineQuantization->quantized_dimension)]);
+            ret.SetQuantizationDim(armnn::numeric_cast<unsigned int>(affineQuantization->quantized_dimension));
         }
         else
         {
@@ -485,13 +483,16 @@ armnn::ConstTensor CreateConstTensor(const TfLiteTensor* tfLiteTensor,
 
     if (permutationVector.has_value() && permutationVector.value().GetSize() > 0 && permutationData != nullptr)
     {
-        armnnUtils::Permute(armnnUtils::Permuted(tensorInfo.GetShape(), permutationVector.value()),
+        // Permute tensor info
+        tensorInfo = armnnUtils::Permuted(tensorInfo, permutationVector.value());
+        // then permute data using the shape from permuted tensor info
+        armnnUtils::Permute(tensorInfo.GetShape(),
                             permutationVector.value(),
                             tfLiteTensor->data.data,
                             permutationData,
                             armnn::GetDataTypeSize(tensorInfo.GetDataType()));
 
-        return armnn::ConstTensor(armnnUtils::Permuted(tensorInfo, permutationVector.value()), permutationData);
+        return armnn::ConstTensor(tensorInfo, permutationData);
     }
     else
     {
diff --git a/delegate/src/test/Convolution2dTest.cpp b/delegate/src/test/Convolution2dTest.cpp
index 2ce2944a79..6f498ce22e 100644
--- a/delegate/src/test/Convolution2dTest.cpp
+++ b/delegate/src/test/Convolution2dTest.cpp
@@ -166,8 +166,10 @@ void Conv2DWithBiasesReluUint8Test(std::vector<armnn::BackendId>& backends)
                                             expectedOutputValues,
                                             biasShape,
                                             biasValues,
-                                            1, // filter scale
-                                            4, // filter offset
+                                            {1.0f}, // biasScale
+                                            {0},    // biasOffset
+                                            {1.0f}, // filterScale
+                                            {4},    // filterOffsets
                                             2, // output scale
                                             20); // output offset
 }
diff --git a/delegate/src/test/ConvolutionTestHelper.hpp b/delegate/src/test/ConvolutionTestHelper.hpp
index b2a3c889e6..1b33c1d74d 100644
--- a/delegate/src/test/ConvolutionTestHelper.hpp
+++ b/delegate/src/test/ConvolutionTestHelper.hpp
@@ -34,13 +34,16 @@ std::vector<char> CreateConv2dTfLiteModel(tflite::BuiltinOperator convolutionOpe
                                           const std::vector <int32_t>& outputTensorShape,
                                           const std::vector <T>& filterData,
                                           const std::vector <B>& biasData,
-                                          float filterScale = 1.0f,
-                                          int filterOffset = 0,
+                                          const std::vector<float> biasScales = {1.0f},
+                                          const std::vector<int64_t> biasOffsets = {0},
+                                          const std::vector<float> filterScales = {1.0f},
+                                          const std::vector<int64_t> filterOffsets = {0},
                                           float outputQuantScale = 2.0f,
                                           int outputQuantOffset = 0,
                                           float quantScale = 1.0f,
                                           int quantOffset = 0,
-                                          int32_t depth_multiplier = 1)
+                                          int32_t depth_multiplier = 1,
+                                          int32_t filterQuantizationDim = 0)
 {
     using namespace tflite;
     flatbuffers::FlatBufferBuilder flatBufferBuilder;
@@ -67,12 +70,23 @@ std::vector<char> CreateConv2dTfLiteModel(tflite::BuiltinOperator convolutionOpe
                                      0,
                                      flatBufferBuilder.CreateVector<float>({ outputQuantScale }),
                                      flatBufferBuilder.CreateVector<int64_t>({ outputQuantOffset }));
+
     auto filterQuantizationParameters =
-        CreateQuantizationParameters(flatBufferBuilder,
-                                     0,
-                                     0,
-                                     flatBufferBuilder.CreateVector<float>({ filterScale }),
-                                     flatBufferBuilder.CreateVector<int64_t>({ filterOffset }));
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>(filterScales),
+                                         flatBufferBuilder.CreateVector<int64_t>(filterOffsets),
+                                         tflite::QuantizationDetails_NONE,
+                                         0,
+                                         filterQuantizationDim);
+
+    auto biasQuantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>(biasScales),
+                                         flatBufferBuilder.CreateVector<int64_t>(biasOffsets));
 
     std::array<flatbuffers::Offset<Tensor>, 4> tensors;
     tensors[0] = CreateTensor(flatBufferBuilder,
@@ -100,7 +114,7 @@ std::vector<char> CreateConv2dTfLiteModel(tflite::BuiltinOperator convolutionOpe
                               biasTensorType,
                               2,
                               flatBufferBuilder.CreateString("bias"),
-                              quantizationParameters);
+                              biasQuantizationParameters);
     tensors[3] = CreateTensor(flatBufferBuilder,
                               flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
                                                                       outputTensorShape.size()),
@@ -192,13 +206,16 @@ void ConvolutionTest(tflite::BuiltinOperator convolutionOperatorCode,
                      std::vector<T>& expectedOutputValues,
                      const std::vector<int32_t>& biasShape = {},
                      const std::vector<B>& biasValues = {},
-                     float filterScale = 1.0f,
-                     int filterOffset = 0,
+                     const std::vector<float> biasScales = {1.0f},
+                     const std::vector<int64_t> biasOffsets = {0},
+                     const std::vector<float> filterScales = {1.0f},
+                     const std::vector<int64_t> filterOffsets = {0},
                      float outputQuantScale = 2.0f,
                      int outputQuantOffset = 0,
                      float quantScale = 1.0f,
                      int quantOffset = 0,
-                     int32_t depth_multiplier = 1)
+                     int32_t depth_multiplier = 1,
+                     int32_t filterQuantizationDim = 3)
 
 {
     using namespace tflite;
@@ -218,13 +235,16 @@ void ConvolutionTest(tflite::BuiltinOperator convolutionOperatorCode,
                                           outputShape,
                                           filterValues,
                                           biasValues,
-                                          filterScale,
-                                          filterOffset,
+                                          biasScales,
+                                          biasOffsets,
+                                          filterScales,
+                                          filterOffsets,
                                           outputQuantScale,
                                           outputQuantOffset,
                                           quantScale,
                                           quantOffset,
-                                          depth_multiplier);
+                                          depth_multiplier,
+                                          filterQuantizationDim);
 
 
     const Model* tfLiteModel = GetModel(modelBuffer.data());
diff --git a/delegate/src/test/DepthwiseConvolution2dTest.cpp b/delegate/src/test/DepthwiseConvolution2dTest.cpp
index 6ca456982b..ca10f2c0cb 100644
--- a/delegate/src/test/DepthwiseConvolution2dTest.cpp
+++ b/delegate/src/test/DepthwiseConvolution2dTest.cpp
@@ -70,12 +70,14 @@ void DepthwiseConv2dValidReluFp32Test(std::vector<armnn::BackendId>& backends)
                            expectedOutputValues,
                            biasShape,
                            biasValues,
-                           1.0f, // filterScale
-                           0,    // filterOffset
-                           2.0f, // outputQuantScale
-                           0,    // outputQuantOffset
-                           1.0f, // quantScale
-                           0,    // quantOffset
+                           {1.0f}, // biasScale
+                           {0},    // biasOffset
+                           {1.0f}, // filterScale
+                           {0},    // filterOffsets
+                           2.0f,   // outputQuantScale
+                           0,      // outputQuantOffset
+                           1.0f,   // quantScale
+                           0,      // quantOffset
                            depth_multiplier);
 }
 
@@ -126,6 +128,100 @@ void DepthwiseConv2dSameUint8Test(std::vector<armnn::BackendId>& backends)
                                       biasValues);
 }
 
+void DepthwiseConv2dSameInt8PerChannelTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 4, 4, 4 };
+    std::vector<int32_t> filterShape { 1, 2, 2, 16 };
+    std::vector<int32_t> biasShape {16} ;
+    std::vector<int32_t> outputShape { 1, 4, 4, 16 };
+
+    static std::vector<int8_t> inputValues =
+        {
+            3,3,3,4, 4,4,0,0, 0,3,4,3, 0,2,2,3,
+            3,0,3,0, 0,3,2,1, 4,1,2,2, 0,0,0,4,
+            3,2,2,2, 2,1,0,4, 4,3,2,4, 3,2,0,0,
+            4,1,4,4, 1,0,4,3, 3,2,0,3, 1,1,0,2
+        };
+
+    std::vector<int8_t> filterValues = { 12,20,10, 3, 2,24, 9,10, 5,16,30,12, 3,10, 4,32,
+                                           8, 0,30, 3, 0,16,12,15,20,12, 0, 3, 9,20, 8, 8,
+                                          12,15,20, 0, 0, 0, 3,15,15, 8,40,12, 9, 5, 2,24,
+                                           4, 0, 0, 6, 6, 0, 3, 5,20, 8,20, 3, 6,15, 4, 0 };
+    std::vector<float> filterScales = {         0.25,   0.2,        0.1, 0.3333333333,
+                                                 0.5, 0.125, 0.33333333,          0.2,
+                                                 0.2,  0.25,        0.1,  0.333333333,
+                                        0.3333333333,   0.2,        0.5,        0.125 };
+
+    int32_t filterQuantizationDim = 3;
+
+    int32_t depth_multiplier = 4;
+
+    std::vector<int32_t> biasValues = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+    float inputScale = 1.0f;
+    std::vector<float> biasScales {};
+    std::vector<int64_t> biasOffsets {};
+    std::vector<int64_t> filterOffsets {};
+    for (const auto& filterScale: filterScales)
+    {
+        biasScales.push_back(inputScale * filterScale);
+        // filter and bias offset always needs to be zero for per channel. We don't support anything else
+        biasOffsets.push_back(0);
+        filterOffsets.push_back(0);
+    }
+
+    std::vector<int8_t> expectedOutputValues =
+        {
+            26,21,21, 7,12,17,28,21,20,22,25,26, 6,11,10,16,
+            16,16, 4,12, 7,18,28,27,30,20,12,14,16,19,17, 6,
+            12,12, 8, 0, 3,13,18,15,18,26,20,26,26,32,28,21,
+            0, 0, 0, 0, 2, 6, 6, 4, 2, 8, 6, 8,15,10,10,24,
+            20,21, 9, 7, 3, 6,15,16,17,22,17,22,17,18,14, 7,
+            18, 6,16,12,12,11,17,15,18,18,10,12,27,26,22,18,
+            27,28,12,10, 7, 3, 8,13, 8,12,14,16,26,24,24,24,
+            9, 9, 6, 0, 0, 0, 2, 6, 0, 0, 0, 0, 4, 8, 8,16,
+            26,24,17, 7, 2, 8,11,10,30,24,30,28,32,33,30,24,
+            20,11,16,12, 7, 9,17,13,20,14,16,18,31,36,33,29,
+            28,25,19, 9, 6,13,20,19, 2, 8, 6, 8,17,17,15,25,
+            12,15, 5, 3, 2, 6, 7, 7, 0, 0, 0, 0, 6, 2, 2, 6,
+            14,16, 7, 5, 1, 3, 3, 2,20,28,12,20,13,20,20,19,
+            9, 4,10, 4, 0, 4, 8, 6, 4,16,12,16,12,18,18,15,
+            11,12, 6, 4, 2, 8,10, 7, 0, 0, 0, 0, 9,14,14,14,
+            3, 4, 1, 1, 1, 3, 3, 2, 0, 0, 0, 0, 2, 4, 4, 8
+        };
+
+    tflite::Padding padding = tflite::Padding_SAME;
+
+    ConvolutionTest<int8_t, int32_t>(tflite::BuiltinOperator_DEPTHWISE_CONV_2D,
+                                      ::tflite::TensorType_INT8,
+                                      1, // strideX
+                                      1, // strideY
+                                      1, // dilationX
+                                      1, // dilationY
+                                      padding,
+                                      tflite::ActivationFunctionType_NONE,
+                                      backends,
+                                      inputShape,
+                                      filterShape,
+                                      outputShape,
+                                      inputValues,
+                                      filterValues,
+                                      expectedOutputValues,
+                                      biasShape,
+                                      biasValues,
+                                      biasScales,
+                                      biasOffsets,
+                                      filterScales,
+                                      filterOffsets,
+                                      1.0f,
+                                      0,
+                                      inputScale,
+                                      0,
+                                      depth_multiplier,
+                                      filterQuantizationDim);
+}
+
 TEST_SUITE("DepthwiseConv2d_CpuRef_Tests")
 {
 
@@ -141,6 +237,12 @@ TEST_CASE ("DepthwiseConv2d_Same_Uint8_CpuRef_Test")
     DepthwiseConv2dSameUint8Test(backends);
 }
 
+TEST_CASE ("DepthwiseConv2d_Same_Int8_PerChannelQuantization_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    DepthwiseConv2dSameInt8PerChannelTest(backends);
+}
+
 }//End of TEST_SUITE("DepthwiseConv2d_CpuRef_Tests")
 
 TEST_SUITE("DepthwiseConv2d_CpuAcc_Tests")
diff --git a/include/armnnUtils/Permute.hpp b/include/armnnUtils/Permute.hpp
index d719f4a623..1e4166d938 100644
--- a/include/armnnUtils/Permute.hpp
+++ b/include/armnnUtils/Permute.hpp
@@ -15,8 +15,7 @@ armnn::TensorShape Permuted(const armnn::TensorShape& srcShape,
                             const armnn::PermutationVector& mappings);
 
 armnn::TensorInfo Permuted(const armnn::TensorInfo& info,
-                           const armnn::PermutationVector& mappings,
-                           bool perChannelPermute = false);
+                           const armnn::PermutationVector& mappings);
 
 void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings,
              const void* src, void* dst, size_t dataTypeSize);
diff --git a/src/armnn/test/UtilsTests.cpp b/src/armnn/test/UtilsTests.cpp
index f0198cb9d4..a813feaf7f 100644
--- a/src/armnn/test/UtilsTests.cpp
+++ b/src/armnn/test/UtilsTests.cpp
@@ -249,22 +249,24 @@ BOOST_AUTO_TEST_CASE(CyclicalGraphTopologicalSortTest)
 
 BOOST_AUTO_TEST_CASE(PermuteQuantizationDim)
 {
-    std::vector<float> scales;
+    std::vector<float> scales {1.0f, 1.0f};
 
     // Set QuantizationDim to be index 1
-    const armnn::TensorInfo info({ 1, 2, 3, 4 }, armnn::DataType::Float32, scales, 1U);
-    BOOST_CHECK(info.GetQuantizationDim().value() == 1U);
+    const armnn::TensorInfo perChannelInfo({ 1, 2, 3, 4 }, armnn::DataType::Float32, scales, 1U);
+    BOOST_CHECK(perChannelInfo.GetQuantizationDim().value() == 1U);
 
     // Permute so that index 1 moves to final index i.e. index 3
     armnn::PermutationVector mappings({ 0, 3, 2, 1 });
-    auto permutedPerChannel = armnnUtils::Permuted(info, mappings, true);
-    auto permuted = armnnUtils::Permuted(info, mappings);
+    auto permutedPerChannel = armnnUtils::Permuted(perChannelInfo, mappings);
 
     // Check that QuantizationDim is in index 3
     BOOST_CHECK(permutedPerChannel.GetQuantizationDim().value() == 3U);
 
-    // Check previous implementation unchanged
-    BOOST_CHECK(permuted.GetQuantizationDim().value() == 1U);
+    // Even if there is only a single scale the quantization dim still exists and needs to be permuted
+    std::vector<float> scale {1.0f};
+    const armnn::TensorInfo perChannelInfo1({ 1, 2, 3, 4 }, armnn::DataType::Float32, scale, 1U);
+    auto permuted = armnnUtils::Permuted(perChannelInfo1, mappings);
+    BOOST_CHECK(permuted.GetQuantizationDim().value() == 3U);
 }
 
 #if defined(ARMNNREF_ENABLED)
diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp
index a68839c20e..9b1fa9075c 100644
--- a/src/armnnTfLiteParser/TfLiteParser.cpp
+++ b/src/armnnTfLiteParser/TfLiteParser.cpp
@@ -359,7 +359,6 @@ void CalcPadding(uint32_t inputSize,
 
 armnn::TensorInfo ToTensorInfo(TfLiteParserImpl::TensorRawPtr tensorPtr,
                                const std::vector<unsigned int>& shapes,
-                               const armnn::PermutationVector& dimensionMappings = {0, 1, 2, 3},
                                const bool outputTensor = false)
 {
     armnn::DataType type;
@@ -472,8 +471,7 @@ armnn::TensorInfo ToTensorInfo(TfLiteParserImpl::TensorRawPtr tensorPtr,
             armnn::TensorInfo result(tensorShape,
                                      type,
                                      quantizationScales,
-                                     dimensionMappings[armnn::numeric_cast<unsigned int>(
-                                         tensorPtr->quantization->quantized_dimension)]);
+                                     armnn::numeric_cast<unsigned int>(tensorPtr->quantization->quantized_dimension));
             return result;
         }
     }
@@ -493,19 +491,17 @@ armnn::TensorInfo ToTensorInfo(TfLiteParserImpl::TensorRawPtr tensorPtr,
     }
 }
 
-armnn::TensorInfo ToTensorInfo(TfLiteParserImpl::TensorRawPtr tensorPtr,
-                               const armnn::PermutationVector& dimensionMappings = {0, 1, 2, 3})
+armnn::TensorInfo ToTensorInfo(TfLiteParserImpl::TensorRawPtr tensorPtr)
 {
     auto const & dimensions = AsUnsignedVector(tensorPtr->shape);
-    return ToTensorInfo(tensorPtr, dimensions, dimensionMappings);
+    return ToTensorInfo(tensorPtr, dimensions);
 }
 
 armnn::TensorInfo ToTensorInfo(TfLiteParserImpl::TensorRawPtr tensorPtr,
                                const bool outputTensor)
 {
     auto const & dimensions = AsUnsignedVector(tensorPtr->shape);
-    const armnn::PermutationVector& dimensionMappings = {0, 1, 2, 3};
-    return ToTensorInfo(tensorPtr, dimensions, dimensionMappings, outputTensor);
+    return ToTensorInfo(tensorPtr, dimensions, outputTensor);
 }
 
 template<typename T>
@@ -1013,7 +1009,7 @@ void TfLiteParserImpl::ParseDepthwiseConv2D(size_t subgraphIndex, size_t operato
     PermutationVector permutationVector{ 2, 3, 1, 0 }; // [H, W, I, M] -> [M, I, H, W]
 
     armnn::TensorInfo inputTensorInfo  = ToTensorInfo(inputs[0]);
-    armnn::TensorInfo filterTensorInfo = ToTensorInfo(inputs[1], permutationVector);
+    armnn::TensorInfo filterTensorInfo = ToTensorInfo(inputs[1]);
 
     // Assuming input is NHWC
     unsigned int inputHeight = inputTensorInfo.GetShape()[1];
diff --git a/src/armnnTfLiteParser/test/DepthwiseConvolution2D.cpp b/src/armnnTfLiteParser/test/DepthwiseConvolution2D.cpp
index 7380d884fd..95ad2d5ee9 100644
--- a/src/armnnTfLiteParser/test/DepthwiseConvolution2D.cpp
+++ b/src/armnnTfLiteParser/test/DepthwiseConvolution2D.cpp
@@ -225,19 +225,19 @@ BOOST_FIXTURE_TEST_CASE(ParseDynamicDepthwiseConv2DSameBias, DynamicDepthwiseCon
 struct DepthwiseConvolution2dFixture2 : public ParserFlatbuffersFixture
 {
     explicit DepthwiseConvolution2dFixture2(const std::string& inputShape,
-                                            const std::string& outputShape,
-                                            const std::string& filterShape,
-                                            const std::string& filterData,
-                                            const std::string& strides,
-                                            const std::string& paddingType,
-                                            const std::string  biasShape                = "",
-                                            const std::string  biasData                 = "",
-                                            const std::string  filter_quant_min         = "[ 0.0 ]",
-                                            const std::string  filter_quant_max         = "[ 255.0 ]",
-                                            const std::string  filter_quant_scale       = "[ 1.0 ]",
-                                            const std::string  filter_quant_zero_point  = "[ 0 ]",
-                                            const std::string  filter_quant_axis        = ""
-                                            )
+                                           const std::string& outputShape,
+                                           const std::string& filterShape,
+                                           const std::string& filterData,
+                                           const std::string& strides,
+                                           const std::string& paddingType,
+                                           const std::string biasShape = "",
+                                           const std::string biasData = "",
+                                           const std::string filter_quant_min = "[ 0.0 ]",
+                                           const std::string filter_quant_max = "[ 255.0 ]",
+                                           const std::string filter_quant_scale = "[ 1.0 ]",
+                                           const std::string filter_quant_zero_point = "[ 0 ]",
+                                           const std::string filter_quant_axis = "",
+                                           const std::string output_scale = "[ 1.0 ]")
     {
         std::string inputTensors = "[ 0, 2 ]";
         std::string biasTensor   = "";
@@ -301,7 +301,7 @@ struct DepthwiseConvolution2dFixture2 : public ParserFlatbuffersFixture
                             "quantization": {
                                 "min": [ 0.0 ],
                                 "max": [ 511.0 ],
-                                "scale": [ 1.0 ],
+                                "scale": )" + output_scale + R"(,
                                 "zero_point": [ 0 ],
                             }
                         },
@@ -381,12 +381,12 @@ struct DepthwiseConvolution2dNoChannelQuantFixture : DepthwiseConvolution2dFixtu
     : DepthwiseConvolution2dFixture2("[ 1, 3, 3, 3 ]",           // inputShape
                                      "[ 1, 3, 3, 3 ]",           // outputShape
                                      "[ 1, 3, 3, 3 ]",           // filterShape
-                                     "[ 9,8,7, 6,5,4, 3,2,1, 9,8,7, 6,5,4, 3,2,1, 9,8,7, 6,5,4, 3,2,1 ]",  // filterData
+                                     "[ 9,8,7, 6,5,4, 3,2,1, 9,8,7, 6,5,4, 3,2,1, 9,8,7, 6,5,4, 3,2,1 ]",  //filterData
                                      "1",                        // stride w and h
                                      "SAME",                     // padding type
                                      "",                         // bias shape
                                      "",                         // bias data
-                                     "[ 0.0 ]",                  // filter quantization min values
+                                      "[ 0.0 ]",                  // filter quantization min values
                                      "[ 255.0 ]",                // filter quantization max values
                                      "[ 1.0, 1.0, 1.0]",         // filter quantization scales
                                      "[ 0, 0, 0]",               // filter quantization zero-points
@@ -582,4 +582,396 @@ BOOST_FIXTURE_TEST_CASE(ParseDepthwiseConv2DFilterWeightsPerChannelQuant4,
            9,  8,  7,  6,  5,  4,  3,  2, 1,  9,  8,  7,  6,  5,  4,  3});
 }
 
+
+struct DepthwiseConvolution2dWeightsPerChannelQuant6Fixture : DepthwiseConvolution2dFixture2
+{
+    DepthwiseConvolution2dWeightsPerChannelQuant6Fixture()
+    : DepthwiseConvolution2dFixture2("[ 1, 4, 4, 4 ]",            // inputShape
+                                     "[ 1, 4, 4, 16 ]",           // outputShape
+                                     "[ 1, 2, 2, 16 ]",           // filterShape
+                                     // filter data is [ 3,4,1,1,1,3,3,2,1,4,3,4,1,2,2,4,
+                                     //                  2,0,3,1,0,2,4,3,4,3,0,1,3,4,4,1,
+                                     //                  3,3,2,0,0,0,1,3,3,2,4,4,3,1,1,3,
+                                     //                  1,0,0,2,3,0,1,1,4,2,2,1,2,3,2,0]
+                                     //                  quantized per channel with q_dim=3
+                                     "[12,20,10, 3, 4,15,30, 6, 4,20,30,12, 4,10,20,12,"
+                                       " 8, 0,30, 3, 0,10,40, 9,16,15, 0, 3,12,20,40, 3,"
+                                       " 12,15,20, 0, 0, 0,10, 9,12,10,40,12,12, 5,10, 9,"
+                                       " 4, 0, 0, 6,12, 0,10, 3,16,10,20, 3, 8,15,20, 0]",
+                                     "1",                        // stride w and h
+                                     "SAME",                     // padding type
+                                     "",                         // bias shape
+                                     "",                         // bias data
+                                     "[ 0.0 ]",                  // filter quantization min values
+                                     "[ 255.0 ]",                // filter quantization max values
+                                     "[ 0.25, 0.2, 0.1, 0.333333333,"
+                                       "0.25, 0.2, 0.1, 0.333333333,"
+                                       "0.25, 0.2, 0.1, 0.333333333,"
+                                       "0.25, 0.2, 0.1, 0.333333333]",   // filter quantization scales
+                                     "[ 0, 0, 0, 0]",            // filter quantization zero-points
+                                     "3"                         // filter quantized axis
+                                                                 // (in case of per channel quantization)
+                                    )
+    {}
+};
+
+
+BOOST_FIXTURE_TEST_CASE(ParseDepthwiseConv2DFilterWeightsPerChannelQuant6,
+                        DepthwiseConvolution2dWeightsPerChannelQuant6Fixture)
+{
+    RunTest<4, armnn::DataType::QAsymmS8>(
+        0,
+        { 1,0,1,2,0,4,4,0,2,1,2,0,1,3,3,0,
+          1,2,2,3,3,4,1,1,2,4,1,3,4,2,0,2,
+          0,3,1,3,4,3,2,0,1,2,3,3,0,2,4,2,
+          1,2,1,4,3,4,1,3,1,0,2,3,1,3,2,0},
+        { 9, 7, 3, 7,12, 8,22,22,27,22,13,17,13,10, 9,17,
+          15, 9,12, 6,16,14,24,27,19,26,18,23, 9,10, 7, 3,
+          18,14, 9,11, 7, 9,21,25,17,19,10,15,13, 9, 7, 9,
+          15,16, 9, 1, 3, 9,11,12, 3,12, 9,12, 6, 2, 2, 6,
+          13, 4,10,12,11,14,28,28,17,17,14,15,15,13,13,22,
+          26,24,17, 7,10,20,33,31,23,17,17,16,16,23,20, 7,
+          17,11,16, 6,10,16,24,22,26,18,23,20,22,23,21,23,
+          12,16, 4, 4, 2, 6, 8,10,12, 8,16,16, 8, 6, 6,14,
+          14, 3,14,10,15,15,27,25,16,14, 9,11,21,19,16,24,
+          24,25,13, 7, 3,13,21,24,25,23,14,17,24,24,21,12,
+          7, 7, 3, 3,11,10,17,13,33,32,21,26,18,17,17,23,
+          3, 3, 2, 0, 2, 6, 9,13,10,20,20,24, 2, 4, 4, 8,
+          9, 4,10, 4, 2,14,22,16, 5, 7, 3, 5,13,20,20,19,
+          11,12, 6, 4, 4,12,12, 8, 9,10, 3, 6,12,18,18,15,
+          5, 4, 4, 2, 0, 6,12, 9,10,14, 6,10, 3, 6, 6,12,
+          3, 4, 1, 1, 3, 9, 9, 6, 2, 8, 6, 8, 0, 0, 0, 0});
+}
+
+
+struct DepthwiseConvolution2dWeightsPerChannelQuant1_1Fixture : DepthwiseConvolution2dFixture2
+{
+    DepthwiseConvolution2dWeightsPerChannelQuant1_1Fixture()
+    : DepthwiseConvolution2dFixture2("[ 1, 3, 3, 3 ]",           // inputShape
+                                     "[ 1, 3, 3, 3 ]",           // outputShape
+                                     "[ 1, 3, 3, 3 ]",           // filterShape
+                                     // filterData is [ 1,4,0,2,4,3,1,0,1,
+                                     //                 3,0,4,0,1,3,4,2,4,
+                                     //                 3,0,3,4,4,0,3,4,2]
+                                     // quantized per channel with q_dim=3
+                                     "[ 4,20, 0, 8,20,30, 4, 0,10,12,"
+                                     " 0,40, 0, 5,30,16,10,40,12, 0,"
+                                       "30,16,20, 0,12,20,20]",
+                                     "1",                        // stride w and h
+                                     "SAME",                     // padding type
+                                     "",                         // bias shape
+                                     "",                         // bias data
+                                     "[ 0.0 ]",                  // filter quantization min values
+                                     "[ 255.0 ]",                // filter quantization max values
+                                     "[ 0.25, 0.2, 0.1]",        // filter quantization scales
+                                     "[ 0, 0, 0]",               // filter quantization zero-points
+                                     "3"                         // filter quantized axis
+                                                                 // (in case of per channel quantization)
+                                    )
+    {}
+};
+
+
+BOOST_FIXTURE_TEST_CASE(ParseDepthwiseConv2DFilterWeightsPerChannelQuant1_1,
+                        DepthwiseConvolution2dWeightsPerChannelQuant1_1Fixture)
+{
+    RunTest<4, armnn::DataType::QAsymmS8>(
+        0,
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+        { 11,11, 9,17,11,16,10, 5,10,
+          14,15,13,21,19,20,13,13,13,
+          7, 7,11,11,11,15, 6, 9,10});
+}
+
+// Same with input different to 1
+struct DepthwiseConvolution2dWeightsPerChannelQuant1_2Fixture : DepthwiseConvolution2dFixture2
+{
+    DepthwiseConvolution2dWeightsPerChannelQuant1_2Fixture()
+    : DepthwiseConvolution2dFixture2("[ 1, 3, 3, 3 ]",           // inputShape
+                                     "[ 1, 3, 3, 3 ]",           // outputShape
+                                     "[ 1, 3, 3, 3 ]",           // filterShape
+                                     // filterData is [ 1,4,0,2,4,3,1,0,1,
+                                     //                 3,0,4,0,1,3,4,2,4,
+                                     //                 3,0,3,4,4,0,3,4,2]
+                                     // quantized per channel with q_dim=3
+                                     "[ 4,20, 0, 8,20,30, 4, 0,10,12,"
+                                     " 0,40, 0, 5,30,16,10,40,12, 0,"
+                                       "30,16,20, 0,12,20,20]",
+                                     "1",                        // stride w and h
+                                     "SAME",                     // padding type
+                                     "",                         // bias shape
+                                     "",                         // bias data
+                                     "[ 0.0 ]",                  // filter quantization min values
+                                     "[ 255.0 ]",                // filter quantization max values
+                                     "[ 0.25, 0.2, 0.1]",        // filter quantization scales
+                                     "[ 0, 0, 0]",               // filter quantization zero-points
+                                     "3"                         // filter quantized axis
+                                                                 // (in case of per channel quantization)
+                                    )
+    {}
+};
+
+
+BOOST_FIXTURE_TEST_CASE(ParseDepthwiseConv2DFilterWeightsPerChannelQuant1_2,
+                        DepthwiseConvolution2dWeightsPerChannelQuant1_2Fixture)
+{
+    RunTest<4, armnn::DataType::QAsymmS8>(
+        0,
+        { 3,2,0,0,4,3,0,1,2,
+          0,1,3,0,4,2,2,2,3,
+          2,4,3,2,0,4,3,4,0},
+        {  0,30,16,15,30,32, 8, 9,24,
+           20,33,28,34,48,50,18,38,35,
+           8, 8,36,20,28,33,10,28,25});
+}
+
+
+struct DepthwiseConvolution2dWeightsPerChannelQuant4_1Fixture : DepthwiseConvolution2dFixture2
+{
+    DepthwiseConvolution2dWeightsPerChannelQuant4_1Fixture()
+    : DepthwiseConvolution2dFixture2("[ 1, 4, 4, 4 ]",            // inputShape
+                                     "[ 1, 4, 4, 16 ]",           // outputShape
+                                     "[ 1, 2, 2, 16 ]",           // filterShape
+                                     // filter data is [ 3,4,1,1,1,3,3,2,1,4,3,4,1,2,2,4,
+                                     //                  2,0,3,1,0,2,4,3,4,3,0,1,3,4,4,1,
+                                     //                  3,3,2,0,0,0,1,3,3,2,4,4,3,1,1,3,
+                                     //                  1,0,0,2,3,0,1,1,4,2,2,1,2,3,2,0 ]
+                                     //                  quantized per channel with q_dim=3
+                                     "[12,20,10, 3, 4,15,30, 6, 4,20,30,13, 4,10,20,13,"
+                                     "  8, 0,30, 3, 0,10,40,10,16,15, 0, 3,12,20,40, 3,"
+                                     " 12,15,20, 0, 0, 0,10,10,12,10,40,13,12, 5,10,10,"
+                                     "  4, 0, 0, 6,12, 0,10, 3,16,10,20, 3, 8,15,20, 0]",
+                                     "1",                        // stride w and h
+                                     "SAME",                     // padding type
+                                     "",                         // bias shape
+                                     "",                         // bias data
+                                     "[ 0.0 ]",                  // filter quantization min values
+                                     "[ 255.0 ]",                // filter quantization max values
+                                     "[ 0.25, 0.2, 0.1, 0.3,"
+                                       "0.25, 0.2, 0.1, 0.3,"
+                                       "0.25, 0.2, 0.1, 0.3,"
+                                       "0.25, 0.2, 0.1, 0.3]",   // filter quantization scales
+                                     "[ 0, 0, 0, 0]",            // filter quantization zero-points
+                                     "3"                         // filter quantized axis
+                                                                 // (in case of per channel quantization)
+                                    )
+    {}
+};
+
+
+BOOST_FIXTURE_TEST_CASE(ParseDepthwiseConv2DFilterWeightsPerChannelQuant4_1,
+                        DepthwiseConvolution2dWeightsPerChannelQuant4_1Fixture)
+{
+    RunTest<4, armnn::DataType::QAsymmS8>(
+        0,
+        { 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
+          1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
+          1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
+          1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1},
+        {  9, 7, 6, 4, 4, 5, 9, 9,12,11, 9,10, 9,10, 9, 8,
+           9, 7, 6, 4, 4, 5, 9, 9,12,11, 9,10, 9,10, 9, 8,
+           9, 7, 6, 4, 4, 5, 9, 9,12,11, 9,10, 9,10, 9, 8,
+           6, 7, 3, 1, 1, 3, 4, 5, 4, 6, 7, 8, 4, 3, 3, 7,
+           9, 7, 6, 4, 4, 5, 9, 9,12,11, 9,10, 9,10, 9, 8,
+           9, 7, 6, 4, 4, 5, 9, 9,12,11, 9,10, 9,10, 9, 8,
+           9, 7, 6, 4, 4, 5, 9, 9,12,11, 9,10, 9,10, 9, 8,
+           6, 7, 3, 1, 1, 3, 4, 5, 4, 6, 7, 8, 4, 3, 3, 7,
+           9, 7, 6, 4, 4, 5, 9, 9,12,11, 9,10, 9,10, 9, 8,
+           9, 7, 6, 4, 4, 5, 9, 9,12,11, 9,10, 9,10, 9, 8,
+           9, 7, 6, 4, 4, 5, 9, 9,12,11, 9,10, 9,10, 9, 8,
+           6, 7, 3, 1, 1, 3, 4, 5, 4, 6, 7, 8, 4, 3, 3, 7,
+           5, 4, 4, 2, 1, 5, 7, 5, 5, 7, 3, 5, 4, 6, 6, 5,
+           5, 4, 4, 2, 1, 5, 7, 5, 5, 7, 3, 5, 4, 6, 6, 5,
+           5, 4, 4, 2, 1, 5, 7, 5, 5, 7, 3, 5, 4, 6, 6, 5,
+           3, 4, 1, 1, 1, 3, 3, 2, 1, 4, 3, 4, 1, 2, 2, 4});
+}
+
+
+
+struct DepthwiseConvolution2dWeightsPerChannelQuant4_2Fixture : DepthwiseConvolution2dFixture2
+{
+    DepthwiseConvolution2dWeightsPerChannelQuant4_2Fixture()
+    : DepthwiseConvolution2dFixture2("[ 1, 4, 4, 4 ]",            // inputShape
+                                     "[ 1, 4, 4, 16 ]",           // outputShape
+                                     "[ 1, 2, 2, 16 ]",           // filterShape
+                                     // filter data is [ 3,4,1,1,1,3,3,2,1,4,3,4,1,2,2,4,
+                                     //                  2,0,3,1,0,2,4,3,4,3,0,1,3,4,4,1,
+                                     //                  3,3,2,0,0,0,1,3,3,2,4,4,3,1,1,3,
+                                     //                  1,0,0,2,3,0,1,1,4,2,2,1,2,3,2,0 ]
+                                     //                  quantized per channel with q_dim=3
+                                     "[12,20,10, 3, 4,15,30, 6, 4,20,30,13, 4,10,20,13,"
+                                     "  8, 0,30, 3, 0,10,40,10,16,15, 0, 3,12,20,40, 3,"
+                                     " 12,15,20, 0, 0, 0,10,10,12,10,40,13,12, 5,10,10,"
+                                     "  4, 0, 0, 6,12, 0,10, 3,16,10,20, 3, 8,15,20, 0]",
+                                     "1",                        // stride w and h
+                                     "SAME",                     // padding type
+                                     "",                         // bias shape
+                                     "",                         // bias data
+                                     "[ 0.0 ]",                  // filter quantization min values
+                                     "[ 255.0 ]",                // filter quantization max values
+                                     "[ 0.25, 0.2, 0.1, 0.3,"
+                                       "0.25, 0.2, 0.1, 0.3,"
+                                       "0.25, 0.2, 0.1, 0.3,"
+                                       "0.25, 0.2, 0.1, 0.3]",   // filter quantization scales
+                                     "[ 0, 0, 0, 0]",            // filter quantization zero-points
+                                     "3"                         // filter quantized axis
+                                                                 // (in case of per channel quantization)
+                                    )
+    {}
+};
+
+
+BOOST_FIXTURE_TEST_CASE(ParseDepthwiseConv2DFilterWeightsPerChannelQuant4_2,
+                        DepthwiseConvolution2dWeightsPerChannelQuant4_2Fixture)
+{
+    RunTest<4, armnn::DataType::QAsymmS8>(
+        0,
+        { 3,3,3,4, 4,4,0,0, 0,3,4,3, 0,2,2,3,
+          3,0,3,0, 0,3,2,1, 4,1,2,2, 0,0,0,4,
+          3,2,2,2, 2,1,0,4, 4,3,2,4, 3,2,0,0,
+          4,1,4,4, 1,0,4,3, 3,2,0,3, 1,1,0,2},
+        { 26,21,21, 7,12,17,28,21,20,22,25,26, 6,11,10,16,
+          16,16, 4,12, 7,18,28,27,30,20,12,14,16,19,17, 6,
+          12,12, 8, 0, 3,13,18,15,18,26,20,26,26,32,28,21,
+          0, 0, 0, 0, 2, 6, 6, 4, 2, 8, 6, 8,15,10,10,24,
+          20,21, 9, 7, 3, 6,15,16,17,22,17,22,17,18,14, 7,
+          18, 6,16,12,12,11,17,15,18,18,10,12,27,26,22,18,
+          27,28,12,10, 7, 3, 8,13, 8,12,14,16,26,24,24,24,
+          9, 9, 6, 0, 0, 0, 2, 6, 0, 0, 0, 0, 4, 8, 8,16,
+          26,24,17, 7, 2, 8,11,10,30,24,30,28,32,33,30,24,
+          20,11,16,12, 7, 9,17,13,20,14,16,18,31,36,33,29,
+          28,25,19, 9, 6,13,20,19, 2, 8, 6, 8,17,17,15,25,
+          12,15, 5, 3, 2, 6, 7, 7, 0, 0, 0, 0, 6, 2, 2, 6,
+          14,16, 7, 5, 1, 3, 3, 2,20,28,12,20,13,20,20,19,
+          9, 4,10, 4, 0, 4, 8, 6, 4,16,12,16,12,18,18,15,
+          11,12, 6, 4, 2, 8,10, 7, 0, 0, 0, 0, 9,14,14,14,
+          3, 4, 1, 1, 1, 3, 3, 2, 0, 0, 0, 0, 2, 4, 4, 8});
+}
+
+
+struct DepthwiseConvolution2dWeightsPerChannelQuant4_5Fixture : DepthwiseConvolution2dFixture2
+{
+    DepthwiseConvolution2dWeightsPerChannelQuant4_5Fixture()
+    : DepthwiseConvolution2dFixture2("[ 1, 4, 4, 4 ]",            // inputShape
+                                     "[ 1, 4, 4, 16 ]",           // outputShape
+                                     "[ 1, 2, 2, 16 ]",           // filterShape
+                                     // filter data is [     1,   4,   9,  16,  25,  36,
+                                     //                     49,  64,  81, 100, 121, 144,
+                                     //                    169, 196, 225, 256,  17,  36,
+                                     //                     57,  80, 105, 132, 161, 192,
+                                     //                    225, 260, 297, 336, 377, 420,
+                                     //                    465, 512,  33,  68, 105, 144,
+                                     //                    185, 228, 273, 320, 369, 420,
+                                     //                    473, 528, 585, 644, 705, 768,
+                                     //                     49, 100, 153, 208, 265, 324,
+                                     //                    385, 448, 513, 580, 649, 720,
+                                     //                    793, 868, 945,1024 ]
+                                     //                  quantized per channel with q_dim=3
+                                     "[ 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,"
+                                       " 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,"
+                                       " 33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,"
+                                       "49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64]",
+                                     "1",                        // stride w and h
+                                     "SAME",                     // padding type
+                                     "",                         // bias shape
+                                     "",                         // bias data
+                                     "[ 0.0 ]",                  // filter quantization min values
+                                     "[ 255.0 ]",                // filter quantization max values
+                                     "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10,11,12,13,14,15,16]", // filter quantization scales
+                                     "[ 0, 0, 0, 0]",            // filter quantization zero-points
+                                     "3",                         // filter quantized axis
+                                                                 // (in case of per channel quantization)
+                                     "[ 100.0 ]"                  // output scale
+                                    )
+    {}
+};
+
+// Test for depthwise_multiplier different to one (M > 1)
+BOOST_FIXTURE_TEST_CASE(ParseDepthwiseConv2DFilterWeightsPerChannelQuant4_5,
+                        DepthwiseConvolution2dWeightsPerChannelQuant4_5Fixture)
+{
+    RunTest<4, armnn::DataType::QAsymmS8>(
+        0,
+        { 1,1,1,2,2,2,1,2,1,2,2,1,2,2,1,1,1,1,1,1,1,2,2,2,
+          1,2,2,2,1,1,1,2,1,1,1,1,2,1,2,1,2,1,1,2,1,2,1,1,
+          1,2,2,1,2,2,1,1,2,1,2,1,1,2,1,2},
+        {  1, 2, 3, 5, 9,11,14,16,17,19,21,24,32,36,39,43,
+           1, 2, 3, 4,11,14,17,20,22,26,29,33,34,38,42,46,
+           1, 2, 3, 5, 8,11,13,16,16,18,21,24,33,36,39,43,
+           0, 0, 1, 1, 2, 3, 3, 4, 4, 5, 5, 6,13,14,16,17,
+           1, 3, 4, 6, 6, 8,10,12,19,22,24,27,23,25,28,30,
+           1, 3, 5, 8, 7, 8,10,12,18,21,24,27,32,36,39,43,
+           1, 2, 4, 5, 8,10,13,15,12,14,16,18,30,33,37,40,
+           0, 0, 1, 1, 3, 4, 5, 7, 4, 5, 5, 6, 9,10,11,12,
+           1, 3, 5, 7,10,12,15,17,17,20,23,25,19,21,23,25,
+           2, 4, 6, 8, 7, 9,11,13,17,20,23,25,23,25,28,30,
+           1, 2, 4, 6, 9,11,14,16,15,17,20,22,28,31,35,38,
+           0, 0, 1, 1, 4, 5, 6, 7, 4, 5, 5, 6,13,14,16,17,
+           0, 0, 1, 1, 2, 3, 4, 5, 3, 4, 5, 6, 5, 6, 6, 7,
+           0, 0, 1, 1, 1, 2, 2, 3, 5, 6, 7, 8, 5, 6, 6, 7,
+           0, 0, 0, 1, 2, 3, 3, 4, 3, 4, 5, 6, 9,10,11,12,
+           0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 3, 3, 4, 5});
+}
+
+
+struct DepthwiseConvolution2dWeightsPerChannelQuant4_3_1Fixture : DepthwiseConvolution2dFixture2
+{
+    DepthwiseConvolution2dWeightsPerChannelQuant4_3_1Fixture()
+    : DepthwiseConvolution2dFixture2("[ 1, 4, 4, 4 ]",            // inputShape
+                                     "[ 1, 4, 4, 16 ]",           // outputShape
+                                     "[ 1, 2, 2, 16 ]",           // filterShape
+                                     // filter data is [ 3,4,1,1,1,3,3,2,1,4,3,4,1,2,2,4,
+                                     //                  2,0,3,1,0,2,4,3,4,3,0,1,3,4,4,1,
+                                     //                  3,3,2,0,0,0,1,3,3,2,4,4,3,1,1,3,
+                                     //                  1,0,0,2,3,0,1,1,4,2,2,1,2,3,2,0 ]
+                                     //                  quantized per channel with q_dim=3
+                                     "[12,20,10, 3, 2,24, 9,10, 5,16,30,12, 3,10, 4,32,"
+                                     "  8, 0,30, 3, 0,16,12,15,20,12, 0, 3, 9,20, 8, 8,"
+                                     " 12,15,20, 0, 0, 0, 3,15,15, 8,40,12, 9, 5, 2,24,"
+                                     "  4, 0, 0, 6, 6, 0, 3, 5,20, 8,20, 3, 6,15, 4, 0]",
+                                     "1",                        // stride w and h
+                                     "SAME",                     // padding type
+                                     "",                         // bias shape
+                                     "",                         // bias data
+                                     "[ 0.0 ]",                  // filter quantization min values
+                                     "[ 255.0 ]",                // filter quantization max values
+                                     "[0.25, 0.2, 0.1, 0.3333333333, "
+                                        "0.5, 0.125, 0.33333333, 0.2, "
+                                        "0.2, 0.25, 0.1, 0.333333333, "
+                                        "0.3333333333, 0.2, 0.5, 0.125]",   // filter quantization scales
+                                     "[ 0, 0, 0, 0]",            // filter quantization zero-points
+                                     "3"                         // filter quantized axis
+                                                                 // (in case of per channel quantization)
+                                    )
+    {}
+};
+
+// Test for depthwise_multiplier different to one (M > 1)
+BOOST_FIXTURE_TEST_CASE(ParseDepthwiseConv2DFilterWeightsPerChannelQuant4_3_1,
+                        DepthwiseConvolution2dWeightsPerChannelQuant4_3_1Fixture)
+{
+    RunTest<4, armnn::DataType::QAsymmS8>(
+        0,
+        { 3,3,3,4, 4,4,0,0, 0,3,4,3, 0,2,2,3,
+          3,0,3,0, 0,3,2,1, 4,1,2,2, 0,0,0,4,
+          3,2,2,2, 2,1,0,4, 4,3,2,4, 3,2,0,0,
+          4,1,4,4, 1,0,4,3, 3,2,0,3, 1,1,0,2},
+        { 26,21,21, 7,12,17,28,21,20,22,25,26, 6,11,10,16,
+          16,16, 4,12, 7,18,28,27,30,20,12,14,16,19,17, 6,
+          12,12, 8, 0, 3,13,18,15,18,26,20,26,26,32,28,21,
+          0, 0, 0, 0, 2, 6, 6, 4, 2, 8, 6, 8,15,10,10,24,
+          20,21, 9, 7, 3, 6,15,16,17,22,17,22,17,18,14, 7,
+          18, 6,16,12,12,11,17,15,18,18,10,12,27,26,22,18,
+          27,28,12,10, 7, 3, 8,13, 8,12,14,16,26,24,24,24,
+          9, 9, 6, 0, 0, 0, 2, 6, 0, 0, 0, 0, 4, 8, 8,16,
+          26,24,17, 7, 2, 8,11,10,30,24,30,28,32,33,30,24,
+          20,11,16,12, 7, 9,17,13,20,14,16,18,31,36,33,29,
+          28,25,19, 9, 6,13,20,19, 2, 8, 6, 8,17,17,15,25,
+          12,15, 5, 3, 2, 6, 7, 7, 0, 0, 0, 0, 6, 2, 2, 6,
+          14,16, 7, 5, 1, 3, 3, 2,20,28,12,20,13,20,20,19,
+          9, 4,10, 4, 0, 4, 8, 6, 4,16,12,16,12,18,18,15,
+          11,12, 6, 4, 2, 8,10, 7, 0, 0, 0, 0, 9,14,14,14,
+          3, 4, 1, 1, 1, 3, 3, 2, 0, 0, 0, 0, 2, 4, 4, 8});
+}
+
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnUtils/Permute.cpp b/src/armnnUtils/Permute.cpp
index 377046367c..7d15f3ca5d 100644
--- a/src/armnnUtils/Permute.cpp
+++ b/src/armnnUtils/Permute.cpp
@@ -113,14 +113,14 @@ armnn::TensorShape Permuted(const armnn::TensorShape& srcShape,
 }
 
 armnn::TensorInfo Permuted(const armnn::TensorInfo& info,
-                           const armnn::PermutationVector& mappings,
-                           bool perChannelPermute)
+                           const armnn::PermutationVector& mappings)
 {
     armnn::TensorInfo outInfo(info);
     outInfo.SetShape(Permuted(info.GetShape(), mappings));
 
-    // If TensorInfo has Per-Axis Quantization then permute QuantizationDim to mapping
-    if (info.HasPerAxisQuantization() && perChannelPermute)
+    // If TensorInfo has Per-Axis Quantization then it also has a QuantizationDim which needs to
+    // be permuted according to the mapping
+    if (info.GetQuantizationDim().has_value())
     {
         outInfo.SetQuantizationDim(mappings[info.GetQuantizationDim().value()]);
     }
-- 
cgit v1.2.1