IVGCVSW-5736 and IVGCVSW-5743 'NonConstWeights: Update front-end and TfLiteDelegate support for FullyConnected Operator'

* Added front-end support for non-const weights for FULLY_CONNECTED operator * Added FULLY_CONNECTED end-to-end test * Updated FULLY_CONNECTED operator support in TfLite Arm NN Delegate for non-const weights * Updated the version numbers Signed-off-by: Sadik Armagan <sadik.armagan@arm.com> Change-Id: Iffa5b9aa9297aca4c02d923cce4636c88ac21faa
author: Sadik Armagan <sadik.armagan@arm.com> 2021-03-25 07:46:55 +0000
committer: Sadik Armagan <sadik.armagan@arm.com> 2021-03-25 07:46:55 +0000
commit: f0a6dec75832604d5ab18242dc216852821a8279 (patch)
tree: ff25e64c62c63975a54abd16a8bff744be70d7c0 /delegate
parent: 16fb1a2d9c1d3d80c0f0b6ab549919fbabd2a0b9 (diff)
download: armnn-f0a6dec75832604d5ab18242dc216852821a8279.tar.gz
4 files changed, 130 insertions, 115 deletions
diff --git a/delegate/include/Version.hpp b/delegate/include/Version.hpp
index 8b831f92cf..99ee2ad60f 100644
--- a/delegate/include/Version.hpp
+++ b/delegate/include/Version.hpp
@@ -14,7 +14,7 @@ namespace armnnDelegate
 
 // ArmNN Delegate version components
 #define DELEGATE_MAJOR_VERSION 24
-#define DELEGATE_MINOR_VERSION 0
+#define DELEGATE_MINOR_VERSION 1
 #define DELEGATE_PATCH_VERSION 0
 
 /// DELEGATE_VERSION: "X.Y.Z"
diff --git a/delegate/src/FullyConnected.hpp b/delegate/src/FullyConnected.hpp
index 0a82286479..2b45c48a89 100644
--- a/delegate/src/FullyConnected.hpp
+++ b/delegate/src/FullyConnected.hpp
@@ -35,62 +35,27 @@ TfLiteStatus VisitFullyConnectedOperator(DelegateData& delegateData,
 
     const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
     const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
-    if(!IsValid(&tfLiteTensors[tfLiteNode->inputs->data[0]]))
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
     {
-        TF_LITE_MAYBE_KERNEL_LOG(
-            tfLiteContext,
-            "TfLiteArmnnDelegate: Invalid input tensor in operator #%d node #%d: ",
-            operatorCode, nodeIndex);
-        return kTfLiteError;
-    }
-    if (IsDynamicTensor(tfLiteInputTensor))
-    {
-        TF_LITE_MAYBE_KERNEL_LOG(
-            tfLiteContext,
-            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in node #%d: ",
-            nodeIndex);
         return kTfLiteError;
     }
+
     const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
-    if(!IsValid(&tfLiteOutputTensor))
-    {
-        TF_LITE_MAYBE_KERNEL_LOG(
-            tfLiteContext,
-            "TfLiteArmnnDelegate: Invalid output tensor in operator #%d node #%d: ",
-            operatorCode, nodeIndex);
-        return kTfLiteError;
-    }
-    if (IsDynamicTensor(tfLiteOutputTensor))
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
     {
-        TF_LITE_MAYBE_KERNEL_LOG(
-            tfLiteContext,
-            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in node #%d: ",
-            nodeIndex);
         return kTfLiteError;
     }
 
     const TfLiteTensor& tfLiteWeightsTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
-    if(!IsValid(&tfLiteWeightsTensor))
-    {
-        TF_LITE_MAYBE_KERNEL_LOG(
-            tfLiteContext,
-            "TfLiteArmnnDelegate: Invalid weights tensor in operator #%d node #%d: ",
-            operatorCode, nodeIndex);
-        return kTfLiteError;
-    }
-    if (IsDynamicTensor(tfLiteWeightsTensor))
+    if (!IsValid(tfLiteContext, tfLiteWeightsTensor, operatorCode, nodeIndex))
     {
-        TF_LITE_MAYBE_KERNEL_LOG(
-            tfLiteContext,
-            "TfLiteArmnnDelegate: Dynamic weight tensors are not supported in node #%d: ",
-            nodeIndex);
         return kTfLiteError;
     }
 
     const armnn::TensorInfo& inputTensorInfo   = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    armnn::TensorInfo weightsTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteWeightsTensor);
     const armnn::TensorInfo& outputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
 
-    armnn::TensorInfo weightsTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteWeightsTensor);
     // Fully Connected Layer accepts two dimensional weights input
     int32_t weightsDimension = static_cast<int32_t>(weightsTensorInfo.GetNumDimensions());
     if (weightsDimension != 2)
@@ -102,24 +67,23 @@ TfLiteStatus VisitFullyConnectedOperator(DelegateData& delegateData,
         return kTfLiteError;
     }
 
+    bool isConstantWeights = tflite::IsConstantTensor(&tfLiteWeightsTensor);
+
     armnn::TensorInfo biasTensorInfo;
     if (biasEnabled)
     {
         const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
-        if(!IsValid(&tfLiteBiasTensor))
+        if (!IsValid(tfLiteContext, tfLiteBiasTensor, operatorCode, nodeIndex))
         {
-            TF_LITE_MAYBE_KERNEL_LOG(
-                tfLiteContext,
-                "TfLiteArmnnDelegate: Invalid bias tensor in operator #%d node #%d: ",
-                operatorCode, nodeIndex);
             return kTfLiteError;
         }
-        if (IsDynamicTensor(tfLiteBiasTensor))
+        if ((isConstantWeights && !tflite::IsConstantTensor(&tfLiteBiasTensor))
+            || (!isConstantWeights && tflite::IsConstantTensor(&tfLiteBiasTensor)))
         {
             TF_LITE_MAYBE_KERNEL_LOG(
                 tfLiteContext,
-                "TfLiteArmnnDelegate: Dynamic bias tensors are not supported in node #%d: ",
-                nodeIndex);
+                "TfLiteArmnnDelegate: Weights and bias are not compatible"
+                " in operator #%d node #%d: ", operatorCode, nodeIndex);
             return kTfLiteError;
         }
         biasTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteBiasTensor);
@@ -130,7 +94,6 @@ TfLiteStatus VisitFullyConnectedOperator(DelegateData& delegateData,
     }
 
     armnn::TensorInfo reshapedTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
-
     if (inputTensorInfo.GetNumDimensions() > 2)
     {
         // Calculate reshape to flatten to 2D [batch_size, input_size]
@@ -153,6 +116,7 @@ TfLiteStatus VisitFullyConnectedOperator(DelegateData& delegateData,
     armnn::FullyConnectedDescriptor descriptor;
     descriptor.m_TransposeWeightMatrix = true;
     descriptor.m_BiasEnabled           = biasEnabled;
+    descriptor.m_ConstantWeights       = isConstantWeights;
 
     bool isSupported = false;
     auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
@@ -175,27 +139,28 @@ TfLiteStatus VisitFullyConnectedOperator(DelegateData& delegateData,
         return isSupported ? kTfLiteOk : kTfLiteError;
     }
 
-    auto weightsTensor = CreateConstTensor(&tfLiteWeightsTensor,
-                                           weightsTensorInfo,
-                                           armnn::Optional<armnn::PermutationVector&>());
-
-    armnn::IConnectableLayer* layer = nullptr;
-    if (biasEnabled)
+    armnn::Optional<armnn::ConstTensor> optionalWeights = armnn::EmptyOptional();
+    armnn::Optional<armnn::ConstTensor> optionalBiases = armnn::EmptyOptional();
+    if(descriptor.m_ConstantWeights)
     {
-        const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
-        auto biasTensor = CreateConstTensor(&tfLiteBiasTensor,
-                                            biasTensorInfo,
-                                            armnn::Optional<armnn::PermutationVector&>());
-        layer = delegateData.m_Network->AddFullyConnectedLayer(descriptor,
-                                                               weightsTensor,
-                                                               armnn::Optional<armnn::ConstTensor>(biasTensor));
-    }
-    else
-    {
-        layer = delegateData.m_Network->AddFullyConnectedLayer(descriptor,
-                                                               weightsTensor,
-                                                               armnn::EmptyOptional());
+        auto weightsTensor = CreateConstTensor(&tfLiteWeightsTensor,
+                                               weightsTensorInfo,
+                                               armnn::Optional<armnn::PermutationVector&>());
+        optionalWeights = armnn::Optional<armnn::ConstTensor>(weightsTensor);
+
+        if (biasEnabled)
+        {
+            const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+            auto biasTensor = CreateConstTensor(&tfLiteBiasTensor,
+                                                biasTensorInfo,
+                                                armnn::Optional<armnn::PermutationVector&>());
+            optionalBiases = armnn::Optional<armnn::ConstTensor>(biasTensor);
+        }
     }
+
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddFullyConnectedLayer(descriptor,
+                                                                                     optionalWeights,
+                                                                                     optionalBiases);
     ARMNN_ASSERT(layer != nullptr);
 
     armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
@@ -215,6 +180,14 @@ TfLiteStatus VisitFullyConnectedOperator(DelegateData& delegateData,
         // Connect
         delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[0]]->Connect(reshapeLayer->GetInputSlot(0));
         reshapeLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+        if (!descriptor.m_ConstantWeights)
+        {
+            delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[1]]->Connect(layer->GetInputSlot(1));
+            if (biasEnabled)
+            {
+                delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[2]]->Connect(layer->GetInputSlot(2));
+            }
+        }
         delegateData.m_OutputSlotForNode[tfLiteNode->outputs->data[0]] = &outputSlot;
     }
 
diff --git a/delegate/src/test/FullyConnectedTest.cpp b/delegate/src/test/FullyConnectedTest.cpp
index 018f7f5190..3bea250988 100644
--- a/delegate/src/test/FullyConnectedTest.cpp
+++ b/delegate/src/test/FullyConnectedTest.cpp
@@ -8,7 +8,7 @@
 namespace
 {
 
-void FullyConnectedFp32Test(std::vector<armnn::BackendId>& backends)
+void FullyConnectedFp32Test(std::vector<armnn::BackendId>& backends, bool constantWeights = true)
 {
     std::vector<int32_t> inputTensorShape   { 1, 4, 1, 1 };
     std::vector<int32_t> weightsTensorShape { 1, 4 };
@@ -30,10 +30,11 @@ void FullyConnectedFp32Test(std::vector<armnn::BackendId>& backends)
                               outputTensorShape,
                               inputValues,
                               expectedOutputValues,
-                              weightsData);
+                              weightsData,
+                              constantWeights);
 }
 
-void FullyConnectedActicationTest(std::vector<armnn::BackendId>& backends)
+void FullyConnectedActicationTest(std::vector<armnn::BackendId>& backends, bool constantWeights = true)
 {
     std::vector<int32_t> inputTensorShape   { 1, 4, 1, 1 };
     std::vector<int32_t> weightsTensorShape { 1, 4 };
@@ -55,10 +56,11 @@ void FullyConnectedActicationTest(std::vector<armnn::BackendId>& backends)
                               outputTensorShape,
                               inputValues,
                               expectedOutputValues,
-                              weightsData);
+                              weightsData,
+                              constantWeights);
 }
 
-void FullyConnectedInt8Test(std::vector<armnn::BackendId>& backends)
+void FullyConnectedInt8Test(std::vector<armnn::BackendId>& backends, bool constantWeights = true)
 {
     std::vector<int32_t> inputTensorShape   { 1, 4, 2, 1 };
     std::vector<int32_t> weightsTensorShape { 1, 4 };
@@ -82,7 +84,8 @@ void FullyConnectedInt8Test(std::vector<armnn::BackendId>& backends)
                                 outputTensorShape,
                                 inputValues,
                                 expectedOutputValues,
-                                weightsData);
+                                weightsData,
+                                constantWeights);
 }
 
 TEST_SUITE("FullyConnected_GpuAccTests")
@@ -152,6 +155,24 @@ TEST_CASE ("FullyConnected_Activation_CpuRef_Test")
     FullyConnectedActicationTest(backends);
 }
 
+TEST_CASE ("FullyConnected_Weights_As_Inputs_FP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    FullyConnectedFp32Test(backends, false);
+}
+
+TEST_CASE ("FullyConnected_Weights_As_Inputs_Int8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    FullyConnectedInt8Test(backends, false);
+}
+
+TEST_CASE ("FullyConnected_Weights_As_Inputs_Activation_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    FullyConnectedActicationTest(backends, false);
+}
+
 } // End of TEST_SUITE("FullyConnected_CpuRefTests")
 
 } // anonymous namespace
 \ No newline at end of file
diff --git a/delegate/src/test/FullyConnectedTestHelper.hpp b/delegate/src/test/FullyConnectedTestHelper.hpp
index 1b6ca941b8..37062c3400 100644
--- a/delegate/src/test/FullyConnectedTestHelper.hpp
+++ b/delegate/src/test/FullyConnectedTestHelper.hpp
@@ -5,6 +5,8 @@
 
 #pragma once
 
+#include "TestUtils.hpp"
+
 #include <armnn_delegate.hpp>
 
 #include <flatbuffers/flatbuffers.h>
@@ -25,8 +27,9 @@ std::vector<char> CreateFullyConnectedTfLiteModel(tflite::TensorType tensorType,
                                                   const std::vector <int32_t>& inputTensorShape,
                                                   const std::vector <int32_t>& weightsTensorShape,
                                                   const std::vector <int32_t>& biasTensorShape,
-                                                  const std::vector <int32_t>& outputTensorShape,
-                                                  const std::vector <T>& weightsData,
+                                                  std::vector <int32_t>& outputTensorShape,
+                                                  std::vector <T>& weightsData,
+                                                  bool constantWeights = true,
                                                   float quantScale = 1.0f,
                                                   int quantOffset  = 0,
                                                   float outputQuantScale = 2.0f,
@@ -36,26 +39,38 @@ std::vector<char> CreateFullyConnectedTfLiteModel(tflite::TensorType tensorType,
     flatbuffers::FlatBufferBuilder flatBufferBuilder;
     std::array<flatbuffers::Offset<tflite::Buffer>, 3> buffers;
     buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
-    buffers[1] = CreateBuffer(flatBufferBuilder,
-                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(weightsData.data()),
-                                                    sizeof(T) * weightsData.size()));
 
     auto biasTensorType = ::tflite::TensorType_FLOAT32;
     if (tensorType == ::tflite::TensorType_INT8)
     {
         biasTensorType = ::tflite::TensorType_INT32;
-        std::vector<int32_t> biasData = { 10 };
-        buffers[2] = CreateBuffer(flatBufferBuilder,
-                                  flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(biasData.data()),
-                                                                 sizeof(int32_t) * biasData.size()));
+    }
+    if (constantWeights)
+    {
+        buffers[1] = CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(weightsData.data()),
+                                                    sizeof(T) * weightsData.size()));
 
+        if (tensorType == ::tflite::TensorType_INT8)
+        {
+            std::vector<int32_t> biasData = { 10 };
+            buffers[2] = CreateBuffer(flatBufferBuilder,
+                                      flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(biasData.data()),
+                                                                     sizeof(int32_t) * biasData.size()));
+
+        }
+        else
+        {
+            std::vector<float> biasData = { 10 };
+            buffers[2] = CreateBuffer(flatBufferBuilder,
+                                      flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(biasData.data()),
+                                                                     sizeof(float) * biasData.size()));
+        }
     }
     else
     {
-        std::vector<float> biasData = { 10 };
-        buffers[2] = CreateBuffer(flatBufferBuilder,
-                                  flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(biasData.data()),
-                                                                 sizeof(float) * biasData.size()));
+        buffers[1] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+        buffers[2] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
     }
 
     auto quantizationParameters =
@@ -155,10 +170,11 @@ void FullyConnectedTest(std::vector<armnn::BackendId>& backends,
                         const std::vector <int32_t>& inputTensorShape,
                         const std::vector <int32_t>& weightsTensorShape,
                         const std::vector <int32_t>& biasTensorShape,
-                        const std::vector <int32_t>& outputTensorShape,
-                        const std::vector <T>& inputValues,
-                        const std::vector <T>& expectedOutputValues,
-                        const std::vector <T>& weightsData,
+                        std::vector <int32_t>& outputTensorShape,
+                        std::vector <T>& inputValues,
+                        std::vector <T>& expectedOutputValues,
+                        std::vector <T>& weightsData,
+                        bool constantWeights = true,
                         float quantScale = 1.0f,
                         int quantOffset  = 0)
 {
@@ -171,10 +187,11 @@ void FullyConnectedTest(std::vector<armnn::BackendId>& backends,
                                                                     biasTensorShape,
                                                                     outputTensorShape,
                                                                     weightsData,
+                                                                    constantWeights,
                                                                     quantScale,
                                                                     quantOffset);
-
     const Model* tfLiteModel = GetModel(modelBuffer.data());
+
     // Create TfLite Interpreters
     std::unique_ptr<Interpreter> armnnDelegateInterpreter;
     CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
@@ -191,25 +208,34 @@ void FullyConnectedTest(std::vector<armnn::BackendId>& backends,
     // Create the ArmNN Delegate
     armnnDelegate::DelegateOptions delegateOptions(backends);
     std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
-                        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
-                                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                     armnnDelegate::TfLiteArmnnDelegateDelete);
     CHECK(theArmnnDelegate != nullptr);
+
     // Modify armnnDelegateInterpreter to use armnnDelegate
     CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
 
     // Set input data
-    auto tfLiteDelegateInputId = tfLiteInterpreter->inputs()[0];
-    auto tfLiteDelageInputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateInputId);
-    for (unsigned int i = 0; i < inputValues.size(); ++i)
-    {
-        tfLiteDelageInputData[i] = inputValues[i];
-    }
+    armnnDelegate::FillInput<T>(tfLiteInterpreter, 0, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegateInterpreter, 0, inputValues);
 
-    auto armnnDelegateInputId = armnnDelegateInterpreter->inputs()[0];
-    auto armnnDelegateInputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateInputId);
-    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    if (!constantWeights)
     {
-        armnnDelegateInputData[i] = inputValues[i];
+        armnnDelegate::FillInput<T>(tfLiteInterpreter, 1, weightsData);
+        armnnDelegate::FillInput<T>(armnnDelegateInterpreter, 1, weightsData);
+
+        if (tensorType == ::tflite::TensorType_INT8)
+        {
+            std::vector <int32_t> biasData = {10};
+            armnnDelegate::FillInput<int32_t>(tfLiteInterpreter, 2, biasData);
+            armnnDelegate::FillInput<int32_t>(armnnDelegateInterpreter, 2, biasData);
+        }
+        else
+        {
+            std::vector<float> biasData = {10};
+            armnnDelegate::FillInput<float>(tfLiteInterpreter, 2, biasData);
+            armnnDelegate::FillInput<float>(armnnDelegateInterpreter, 2, biasData);
+        }
     }
 
     // Run EnqueWorkload
@@ -217,16 +243,11 @@ void FullyConnectedTest(std::vector<armnn::BackendId>& backends,
     CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
 
     // Compare output data
-    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
-    auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateOutputId);
-    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
-    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateOutputId);
-    for (size_t i = 0; i < expectedOutputValues.size(); i++)
-    {
-        CHECK(expectedOutputValues[i] == tfLiteDelageOutputData[i]);
-        CHECK(expectedOutputValues[i] == armnnDelegateOutputData[i]);
-        CHECK(tfLiteDelageOutputData[i] == armnnDelegateOutputData[i]);
-    }
+    armnnDelegate::CompareOutputData<T>(tfLiteInterpreter,
+                                        armnnDelegateInterpreter,
+                                        outputTensorShape,
+                                        expectedOutputValues);
+    armnnDelegateInterpreter.reset(nullptr);
 }
 
 } // anonymous namespace
 \ No newline at end of file
author	Sadik Armagan <sadik.armagan@arm.com>	2021-03-25 07:46:55 +0000
committer	Sadik Armagan <sadik.armagan@arm.com>	2021-03-25 07:46:55 +0000
commit	f0a6dec75832604d5ab18242dc216852821a8279 (patch)
tree	ff25e64c62c63975a54abd16a8bff744be70d7c0 /delegate
parent	16fb1a2d9c1d3d80c0f0b6ab549919fbabd2a0b9 (diff)
download	armnn-f0a6dec75832604d5ab18242dc216852821a8279.tar.gz