IVGCVSW-5826 Change weights layout for depthwise to [1,H,W,I*M]

* This change is necessary because tflite uses a [1,H,W,I*M] format and uses the I*M dimension for per axis quantization. Our previous layout [M,I,H,W] can't handle the correlating quantization scales. * Updates Onnx-, TfLiteParser and TfliteDelegate * Updates the CpuRef, CpuAcc and GpuAcc backends * Adjusts unit tests * Adds test to ensure models with old layout can still be read and executed * Adds conversion function to previous layout [1,H,W,I*M] --> [M,I,H,W] which can be used by backend developers !android-nn-driver:5553 Signed-off-by: Jan Eilers <jan.eilers@arm.com> Change-Id: Ifef23368b8c3702cf315a5838d214f7dc13c0152
author: Jan Eilers <jan.eilers@arm.com> 2021-06-02 12:01:25 +0100
committer: Jan Eilers <jan.eilers@arm.com> 2021-06-16 11:31:42 +0000
commit: 53ef79504b4c881c572735393c2eede5fa556c46 (patch)
tree: f6e0cd27c4d03075fa154074c5b12d7c8c3149f7 /src/armnn
parent: 77fe76bfa8cb798943821d1f3e432c228e1cdee3 (diff)
download: armnn-53ef79504b4c881c572735393c2eede5fa556c46.tar.gz
8 files changed, 28 insertions, 40 deletions
diff --git a/src/armnn/layers/DepthwiseConvolution2dLayer.cpp b/src/armnn/layers/DepthwiseConvolution2dLayer.cpp
index b96c567504..ed52b39050 100644
--- a/src/armnn/layers/DepthwiseConvolution2dLayer.cpp
+++ b/src/armnn/layers/DepthwiseConvolution2dLayer.cpp
@@ -98,24 +98,21 @@ DepthwiseConvolution2dLayer::InferOutputShapes(const std::vector<TensorShape>& i
     unsigned int inputBatchSize = inputShape[0];
     unsigned int inputHeight    = inputShape[dataLayoutIndex.GetHeightIndex()];
     unsigned int inputWidth     = inputShape[dataLayoutIndex.GetWidthIndex()];
-    unsigned int inputChannels  = inputShape[dataLayoutIndex.GetChannelsIndex()];
 
-    // Expected filter shape: [ M, I, H, W ] - This shape does NOT depend on the data layout
-    // Namely: [ depth multiplier, input channels, filter height, filter width ]
-    // Output channels = input channels * depthMultiplier
-    unsigned int depthMultiplier = filterShape[0];
+    // Expected filter shape: [ 1, H, W, O ] - This shape does NOT depend on the data layout
+    // Namely: [ 1, filter height, filter width, output channels ]
 
-    unsigned int filterHeight = filterShape[2];
+    unsigned int filterHeight = filterShape[1];
     unsigned int dilatedFilterHeight = filterHeight + (m_Param.m_DilationY - 1) * (filterHeight - 1);
     unsigned int readHeight   = (inputHeight + m_Param.m_PadTop + m_Param.m_PadBottom) - dilatedFilterHeight;
     unsigned int outputHeight = 1 + (readHeight / m_Param.m_StrideY);
 
-    unsigned int filterWidth = filterShape[3];
+    unsigned int filterWidth = filterShape[2];
     unsigned int dilatedFilterWidth = filterWidth + (m_Param.m_DilationX - 1) * (filterWidth - 1);
     unsigned int readWidth   = (inputWidth + m_Param.m_PadLeft + m_Param.m_PadRight) - dilatedFilterWidth;
     unsigned int outputWidth = 1 + (readWidth / m_Param.m_StrideX);
 
-    unsigned int outputChannels  = inputChannels * depthMultiplier;
+    unsigned int outputChannels  = filterShape[3];
     unsigned int outputBatchSize = inputBatchSize;
 
     TensorShape tensorShape = m_Param.m_DataLayout == armnn::DataLayout::NHWC ?
diff --git a/src/armnn/optimizations/FuseBatchNorm.hpp b/src/armnn/optimizations/FuseBatchNorm.hpp
index 3fb4b34d28..fe8238bf14 100644
--- a/src/armnn/optimizations/FuseBatchNorm.hpp
+++ b/src/armnn/optimizations/FuseBatchNorm.hpp
@@ -56,13 +56,12 @@ public:
 
             armnnUtils::DataLayoutIndexed dataLayout(convDescriptor.m_DataLayout);
             auto weightsShape = weightsInfo.GetShape();
-            const unsigned int depthMultiplier = depthwise ? weightsShape[0] : 1;
-            const unsigned int inputChannels   = depthwise ? weightsShape[1] :
-                                                             weightsShape[dataLayout.GetChannelsIndex()];
-            const unsigned int outputChannels  = depthwise ? inputChannels * depthMultiplier : weightsShape[0];
-            const unsigned int weightsHeight   = depthwise ? weightsShape[2] :
+            const unsigned int inputChannels   = parentOut->GetTensorInfo().GetShape()[dataLayout.GetChannelsIndex()];
+            const unsigned int depthMultiplier = depthwise ? weightsShape[3] / inputChannels : 1;
+            const unsigned int outputChannels  = depthwise ? weightsShape[3] : weightsShape[0];
+            const unsigned int weightsHeight   = depthwise ? weightsShape[1] :
                                                              weightsShape[dataLayout.GetHeightIndex()];
-            const unsigned int weightsWidth    = depthwise ? weightsShape[3] :
+            const unsigned int weightsWidth    = depthwise ? weightsShape[2] :
                                                              weightsShape[dataLayout.GetWidthIndex()];
 
             const auto* weightsBuffer = static_cast<const T*>(weightsTensor.GetMemoryArea());
@@ -79,7 +78,6 @@ public:
 
             // fusedWeights = ( gamma * weights ) / ( std - epsilon);
             std::vector<T> fusedWeightsVector(weightsVector.size());
-            unsigned int depthwiseMultiplierIdx = 0;
 
             for (unsigned int cInput = 0; cInput < inputChannels; ++cInput)
             {
@@ -87,12 +85,6 @@ public:
                 {
                     T mult = gammaVector[cOut] / static_cast<T>(sqrtf (varianceVector[cOut] + epsilon));
 
-                    if (depthwise)
-                    {
-                        cInput = cOut / depthMultiplier;
-                        depthwiseMultiplierIdx = cOut % depthMultiplier;
-                    }
-
                     for (unsigned int h = 0; h < weightsHeight; ++h)
                     {
                         for (unsigned int w = 0; w < weightsWidth; ++w)
@@ -101,10 +93,9 @@ public:
 
                             if (depthwise)
                             {
-                                weightsIdx = depthwiseMultiplierIdx * weightsWidth * weightsHeight * inputChannels +
-                                             cInput * weightsWidth * weightsHeight +
-                                             h * weightsWidth +
-                                             w;
+                                cInput = cOut / depthMultiplier;
+                                weightsIdx = w * outputChannels + cOut +
+                                             h * weightsWidth * outputChannels;
                             }
                             else if (convDescriptor.m_DataLayout == DataLayout::NHWC)
                             {
diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp
index 581c621a16..b07e3b80a5 100644
--- a/src/armnn/test/CreateWorkload.hpp
+++ b/src/armnn/test/CreateWorkload.hpp
@@ -1149,7 +1149,7 @@ std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolutio
 
     DepthwiseConvolution2dLayer* const layer = graph.AddLayer<DepthwiseConvolution2dLayer>(layerDesc, "layer");
 
-    layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo({1, 2, 4, 4}, DataType)); // [ M, I, H, W ]
+    layer->m_Weight = std::make_unique<ScopedTensorHandle>(TensorInfo({1, 4, 4, 2}, DataType)); // [ 1, H, W, I*M ]
     layer->m_Weight->Allocate();
 
     // Creates extra layers.
@@ -1181,7 +1181,7 @@ std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolutio
 
     CHECK(queueDescriptor.m_Inputs.size() == 1);
     CHECK(queueDescriptor.m_Outputs.size() == 1);
-    CHECK((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({1, 2, 4, 4}, DataType)));
+    CHECK((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({1, 4, 4, 2}, DataType)));
 
     // Returns so we can do extra, backend-specific tests.
     return workload;
diff --git a/src/armnn/test/InferOutputTests.hpp b/src/armnn/test/InferOutputTests.hpp
index b8276de80c..6e2676ec8e 100644
--- a/src/armnn/test/InferOutputTests.hpp
+++ b/src/armnn/test/InferOutputTests.hpp
@@ -518,7 +518,7 @@ void DepthwiseConvolution2dInferOutputShapeTest()
     armnn::TensorShape inputShape(4, inputSize.data());
     shapes.push_back(inputShape);
 
-    const std::vector<unsigned int> filterSize = { 1, 2, 3, 3};
+    const std::vector<unsigned int> filterSize = { 1, 3, 3, 2 };
     armnn::TensorShape filterShape(4, filterSize.data());
     shapes.push_back(filterShape);
 
diff --git a/src/armnn/test/OptimizerTests.cpp b/src/armnn/test/OptimizerTests.cpp
index e68546c9dd..d4e2d499d5 100644
--- a/src/armnn/test/OptimizerTests.cpp
+++ b/src/armnn/test/OptimizerTests.cpp
@@ -340,7 +340,7 @@ TEST_CASE("DepthwiseConv2dValidateTensorShapesFromInputs")
 {
     Graph graph;
     const unsigned int inputShape[] = { 1, 2, 3, 3 };
-    const unsigned int weightsShape[] = { 1, 2, 3, 3 };
+    const unsigned int weightsShape[] = { 1, 3, 3, 2 };
     const unsigned int outputShape[] = { 1, 2, 1, 1 };
     CreateDepthwiseConvolution2dGraph(graph, inputShape, weightsShape, outputShape);
 
@@ -351,7 +351,7 @@ TEST_CASE("DepthwiseConv2dValidateTensorShapesFromInputsNhwc")
 {
     Graph graph;
     const unsigned int inputShape[] = { 1, 3, 3, 2 };
-    const unsigned int weightsShape[] = { 1, 2, 3, 3 };
+    const unsigned int weightsShape[] = { 1, 3, 3, 2 };
     const unsigned int outputShape[] = { 1, 1, 1, 2 };
     CreateDepthwiseConvolution2dGraph(graph, inputShape, weightsShape, outputShape, DataLayout::NHWC);
 
diff --git a/src/armnn/test/optimizations/FoldPadTests.cpp b/src/armnn/test/optimizations/FoldPadTests.cpp
index 7b4ac4170f..11f09e80e0 100644
--- a/src/armnn/test/optimizations/FoldPadTests.cpp
+++ b/src/armnn/test/optimizations/FoldPadTests.cpp
@@ -687,7 +687,7 @@ TEST_CASE("FoldPadLayerIntoDepthwiseConv2dLayer_ExecuteInferenceWithAndWithoutOp
     // avoided. The output tensors of each should match.
     const unsigned int inputShape[]   = {1, 4, 4, 3}; // NHWCin
     const unsigned int paddedShape[]  = {1, 6, 6, 3};
-    const unsigned int weightsShape[] = {4, 3, 2, 2};  // MCinHW
+    const unsigned int weightsShape[] = {1, 2, 2, 12};  // 1HWCout
     const unsigned int outputShape[]  = {1, 5, 5, 12}; // NHWCout
 
     std::vector<float> inputData({2.0f, 2.0f, 6.0f, 6.0f,
diff --git a/src/armnn/test/optimizations/FuseActivationTests.cpp b/src/armnn/test/optimizations/FuseActivationTests.cpp
index 9e332136f6..35b5bbc2da 100644
--- a/src/armnn/test/optimizations/FuseActivationTests.cpp
+++ b/src/armnn/test/optimizations/FuseActivationTests.cpp
@@ -81,9 +81,9 @@ public:
     using LayerType = DepthwiseConvolution2dLayer;
     static const bool isElementWise = false;
 
-    static TensorShape GetInputShape()   { return TensorShape( {1, 4, 4, 3}); }   // NHWCin
-    static TensorShape GetOutputShape()  { return TensorShape( {1, 3, 3, 12}); }  // NHWCout
-    static TensorShape GetWeightsShape() { return TensorShape( {4, 3, 2, 2}); }   // MCinHW
+    static TensorShape GetInputShape()   { return TensorShape( {1, 4, 4, 3}); }   // [N,H,W,Cin]
+    static TensorShape GetOutputShape()  { return TensorShape( {1, 3, 3, 12}); }  // [N,H,W,Cout]
+    static TensorShape GetWeightsShape() { return TensorShape( {1, 2, 2, 12}); }  // [1,H,W,Cout]
 
     constexpr static const unsigned int inputSize  = 48; //batchIn * heightIn * widthIn * channelIn;
     constexpr static const unsigned int outputSize = 108; //batchOut * heightOut * widthOut * channelOut;
diff --git a/src/armnn/test/optimizations/FuseBatchNormTests.cpp b/src/armnn/test/optimizations/FuseBatchNormTests.cpp
index 671f565054..20d2940b81 100644
--- a/src/armnn/test/optimizations/FuseBatchNormTests.cpp
+++ b/src/armnn/test/optimizations/FuseBatchNormTests.cpp
@@ -90,12 +90,12 @@ INetworkPtr CreatNetwork(bool depthwise, bool preventFusing)
 
     if (depthwise)
     {
-        //M Cin H W
-        weightsDimensionSizes[0] = 4;
-        weightsDimensionSizes[1] = 3;
+        // [1, H, W, Cout]
+        weightsDimensionSizes[0] = 1;
+        weightsDimensionSizes[1] = 2;
         weightsDimensionSizes[2] = 2;
-        weightsDimensionSizes[3] = 2;
-        outputDimensionSizes[3]  = weightsDimensionSizes[0] * weightsDimensionSizes[1];
+        weightsDimensionSizes[3] = 12;
+        outputDimensionSizes[3]  = weightsDimensionSizes[3];
     }
     const unsigned int outputChannelSize[]   = {outputDimensionSizes[3]};  // Cout
 
@@ -295,7 +295,7 @@ TEST_CASE("FuseBatchNormIntoDepthwiseConv2DFloat32Test")
 
 TEST_CASE("FuseBatchNormIntoDepthwiseConv2DFloat16Test")
 {
-    FuseBatchNormIntoConvTest<DepthwiseConv2dTest, DataType::Float16>(true, 0.1f,armnn::Compute::CpuRef);
+    FuseBatchNormIntoConvTest<DepthwiseConv2dTest, DataType::Float16>(true, 0.2f,armnn::Compute::CpuRef);
 }
 #endif
author	Jan Eilers <jan.eilers@arm.com>	2021-06-02 12:01:25 +0100
committer	Jan Eilers <jan.eilers@arm.com>	2021-06-16 11:31:42 +0000
commit	53ef79504b4c881c572735393c2eede5fa556c46 (patch)
tree	f6e0cd27c4d03075fa154074c5b12d7c8c3149f7 /src/armnn
parent	77fe76bfa8cb798943821d1f3e432c228e1cdee3 (diff)
download	armnn-53ef79504b4c881c572735393c2eede5fa556c46.tar.gz