IVGCVSW-5826 Change weights layout for depthwise to [1,H,W,I*M]

* This change is necessary because tflite uses a [1,H,W,I*M] format and uses the I*M dimension for per axis quantization. Our previous layout [M,I,H,W] can't handle the correlating quantization scales. * Updates Onnx-, TfLiteParser and TfliteDelegate * Updates the CpuRef, CpuAcc and GpuAcc backends * Adjusts unit tests * Adds test to ensure models with old layout can still be read and executed * Adds conversion function to previous layout [1,H,W,I*M] --> [M,I,H,W] which can be used by backend developers !android-nn-driver:5553 Signed-off-by: Jan Eilers <jan.eilers@arm.com> Change-Id: Ifef23368b8c3702cf315a5838d214f7dc13c0152
author: Jan Eilers <jan.eilers@arm.com> 2021-06-02 12:01:25 +0100
committer: Jan Eilers <jan.eilers@arm.com> 2021-06-16 11:31:42 +0000
commit: 53ef79504b4c881c572735393c2eede5fa556c46 (patch)
tree: f6e0cd27c4d03075fa154074c5b12d7c8c3149f7 /src/armnnDeserializer/Deserializer.cpp
parent: 77fe76bfa8cb798943821d1f3e432c228e1cdee3 (diff)
download: armnn-53ef79504b4c881c572735393c2eede5fa556c46.tar.gz
1 files changed, 40 insertions, 7 deletions
diff --git a/src/armnnDeserializer/Deserializer.cpp b/src/armnnDeserializer/Deserializer.cpp
index 976986eec3..7951589b53 100644
--- a/src/armnnDeserializer/Deserializer.cpp
+++ b/src/armnnDeserializer/Deserializer.cpp
@@ -927,6 +927,7 @@ IDeserializer::DeserializerImpl::FeatureVersions IDeserializer::DeserializerImpl
     if (graph->featureVersions())
     {
         versions.m_BindingIdScheme = graph->featureVersions()->bindingIdsScheme();
+        versions.m_WeightsLayoutScheme = graph->featureVersions()->weightsLayoutScheme();
     }
 
     return versions;
@@ -1420,19 +1421,51 @@ void IDeserializer::DeserializerImpl::ParseDepthwiseConvolution2d(GraphPtr graph
     descriptor.m_BiasEnabled = serializerDescriptor->biasEnabled();;
     descriptor.m_DataLayout  = ToDataLayout(serializerDescriptor->dataLayout());
 
-    armnn::ConstTensor weights = ToConstTensor(serializerLayer->weights());
-    armnn::ConstTensor biases;
+    IConnectableLayer* layer;
 
     armnn::Optional<armnn::ConstTensor> optionalBiases = armnn::EmptyOptional();
     if (descriptor.m_BiasEnabled)
     {
-        biases = ToConstTensor(serializerLayer->biases());
+        armnn::ConstTensor biases = ToConstTensor(serializerLayer->biases());
         optionalBiases = armnn::Optional<armnn::ConstTensor>(biases);
     }
-    IConnectableLayer* layer = m_Network->AddDepthwiseConvolution2dLayer(descriptor,
-                                                                         weights,
-                                                                         optionalBiases,
-                                                                         layerName.c_str());
+
+    armnn::ConstTensor weights = ToConstTensor(serializerLayer->weights());
+    // The data layout for weights in ArmNN used to be [M,I,H,W] but now it's changed to [1,H,W,I*M]
+    // When reading older flatbuffer files we need to add a permutation to get to the new layout.
+    if (this->GetFeatureVersions(graph).m_WeightsLayoutScheme <= 0)
+    {
+        // Permute weights  [ H, W, M, I ] --> [ 1, H, W, I*M ]
+        // Step1: [ M, I, H, W ] --> [ H, W, I, M]
+        PermutationVector permutationVector = { 3, 2, 0, 1 };
+        armnn::TensorInfo weightsInfo = weights.GetInfo();
+        std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[weightsInfo.GetNumBytes()]);
+        weightsInfo = armnnUtils::Permuted(weightsInfo, permutationVector);
+        armnnUtils::Permute(weightsInfo.GetShape(), permutationVector,
+                            weights.GetMemoryArea(), permuteBuffer.get(),
+                            GetDataTypeSize(weightsInfo.GetDataType()));
+
+        // Step2: Reshape [ H, W, I, M] --> [ 1, H, W, I*M ]
+        auto weightsShape = weightsInfo.GetShape();
+        weightsInfo.SetShape({1,
+                              weightsShape[0],
+                              weightsShape[1],
+                              weightsShape[2]*weightsShape[3]});
+
+        armnn::ConstTensor weightsPermuted(weightsInfo, permuteBuffer.get());
+
+        layer = m_Network->AddDepthwiseConvolution2dLayer(descriptor,
+                                                          weightsPermuted,
+                                                          optionalBiases,
+                                                          layerName.c_str());
+    }
+    else
+    {
+        layer = m_Network->AddDepthwiseConvolution2dLayer(descriptor,
+                                                          weights,
+                                                          optionalBiases,
+                                                          layerName.c_str());
+    }
 
     armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]);
     layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
author	Jan Eilers <jan.eilers@arm.com>	2021-06-02 12:01:25 +0100
committer	Jan Eilers <jan.eilers@arm.com>	2021-06-16 11:31:42 +0000
commit	53ef79504b4c881c572735393c2eede5fa556c46 (patch)
tree	f6e0cd27c4d03075fa154074c5b12d7c8c3149f7 /src/armnnDeserializer/Deserializer.cpp
parent	77fe76bfa8cb798943821d1f3e432c228e1cdee3 (diff)
download	armnn-53ef79504b4c881c572735393c2eede5fa556c46.tar.gz