From 080d45d73c03830cb80b223fd64c546e84d8337a Mon Sep 17 00:00:00 2001
From: Mike Kelly <mike.kelly@arm.com>
Date: Fri, 10 Nov 2023 17:11:53 +0000
Subject: MLCE-1138 Issue with Delegate supporting FP16 models

 * Fixed issue where backends were asked to support FP16 layers that would
   be optimized out.
 * Fixed issue where backends were asked to support non-constant filter
   and bias tensors when those tensors would be replaced by constant
   tensors during optimization.

Signed-off-by: Mike Kelly <mike.kelly@arm.com>
Change-Id: Ib54b9cb99d5014e27172841a665daf57d1d5b23d
---
 delegate/classic/src/Convolution.hpp     |  53 ++++++++++++++--
 delegate/classic/src/Quantization.hpp    |  27 +++++----
 delegate/classic/src/SharedFunctions.cpp |  77 ++++++++++++++++++++++++
 delegate/classic/src/SharedFunctions.hpp |   6 ++
 delegate/opaque/src/Convolution.hpp      |  51 ++++++++++++++--
 delegate/opaque/src/Quantization.hpp     |  26 +++++---
 delegate/opaque/src/SharedFunctions.cpp  | 100 +++++++++++++++++++++++++++++++
 delegate/opaque/src/SharedFunctions.hpp  |   6 ++
 8 files changed, 319 insertions(+), 27 deletions(-)

diff --git a/delegate/classic/src/Convolution.hpp b/delegate/classic/src/Convolution.hpp
index cf0134ec1f..71ecd4c97a 100644
--- a/delegate/classic/src/Convolution.hpp
+++ b/delegate/classic/src/Convolution.hpp
@@ -56,7 +56,6 @@ TfLiteStatus VisitConv2dOperator(DelegateData& delegateData,
     {
         return kTfLiteError;
     }
-
     const TfLiteTensor& tfLiteFilterTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
     if (!IsValid(tfLiteContext, tfLiteFilterTensor, operatorCode, nodeIndex))
     {
@@ -86,6 +85,7 @@ TfLiteStatus VisitConv2dOperator(DelegateData& delegateData,
     if(biasEnabled)
     {
         const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+
         if (!IsValid(tfLiteContext, tfLiteBiasTensor, operatorCode, nodeIndex))
         {
             return kTfLiteError;
@@ -115,6 +115,27 @@ TfLiteStatus VisitConv2dOperator(DelegateData& delegateData,
     armnn::BackendId setBackend;
     if (!delegateData.m_Network)
     {
+        bool filterIsConst = filterTensorInfo.IsConstant();
+
+        if (!filterIsConst)
+        {
+            filterIsConst = WillInputBeOptimizedToConst(tfLiteContext, tfLiteNode->inputs->data[1]);
+        }
+        armnn::TensorInfo filterTensorInfoCopy(filterTensorInfo);
+        filterTensorInfoCopy.SetConstant(filterIsConst);
+        armnn::Optional<armnn::TensorInfo> optionalBiasInfoCopy(biasTensorInfo);
+
+        if (biasEnabled)
+        {
+            bool biasIsConst = biasTensorInfo.IsConstant();
+
+            if (!biasIsConst)
+            {
+                biasIsConst = WillInputBeOptimizedToConst(tfLiteContext, tfLiteNode->inputs->data[2]);
+            }
+            optionalBiasInfoCopy.value().SetConstant(biasIsConst);
+        }
+
         bool isSupported = false;
         FORWARD_LAYER_SUPPORT_FUNC("CONV2D",
                                    tfLiteContext,
@@ -125,8 +146,8 @@ TfLiteStatus VisitConv2dOperator(DelegateData& delegateData,
                                    inputTensorInfo,
                                    outputTensorInfo,
                                    descriptor,
-                                   filterTensorInfo,
-                                   optionalBiasInfo);
+                                   filterTensorInfoCopy,
+                                   optionalBiasInfoCopy);
         return isSupported ? kTfLiteOk : kTfLiteError;
     }
 
@@ -480,6 +501,28 @@ TfLiteStatus VisitDepthwiseConv2dOperator(DelegateData& delegateData,
     armnn::BackendId setBackend;
     if (!delegateData.m_Network)
     {
+        bool filterIsConst = filterTensorInfo.IsConstant();
+
+        if (!filterIsConst)
+        {
+            filterIsConst = WillInputBeOptimizedToConst(tfLiteContext, tfLiteNode->inputs->data[1]);
+        }
+        armnn::TensorInfo filterTensorInfoCopy(filterTensorInfo);
+        filterTensorInfoCopy.SetConstant(filterIsConst);
+
+        armnn::Optional<armnn::TensorInfo> optionalBiasInfoCopy(biasTensorInfo);
+
+        if (biasEnabled)
+        {
+            bool biasIsConst = biasTensorInfo.IsConstant();
+
+            if (!biasIsConst)
+            {
+                biasIsConst = WillInputBeOptimizedToConst(tfLiteContext, tfLiteNode->inputs->data[2]);
+            }
+            optionalBiasInfoCopy.value().SetConstant(biasIsConst);
+        }
+
         bool isSupported = false;
         FORWARD_LAYER_SUPPORT_FUNC("DEPTHWISE_CONV2D",
                                    tfLiteContext,
@@ -490,8 +533,8 @@ TfLiteStatus VisitDepthwiseConv2dOperator(DelegateData& delegateData,
                                    inputTensorInfo,
                                    outputTensorInfo,
                                    descriptor,
-                                   filterTensorInfo,
-                                   armnn::Optional<armnn::TensorInfo>(biasTensorInfo));
+                                   filterTensorInfoCopy,
+                                   optionalBiasInfoCopy);
         return isSupported ? kTfLiteOk : kTfLiteError;
     }
 
diff --git a/delegate/classic/src/Quantization.hpp b/delegate/classic/src/Quantization.hpp
index 8291854383..7fcb9c7c44 100644
--- a/delegate/classic/src/Quantization.hpp
+++ b/delegate/classic/src/Quantization.hpp
@@ -23,7 +23,6 @@ TfLiteStatus VisitDequantizeOperator(DelegateData& delegateData,
 {
     TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
     TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
-
     const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
     const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
     if (IsDynamicTensor(tfLiteInputTensor))
@@ -34,7 +33,6 @@ TfLiteStatus VisitDequantizeOperator(DelegateData& delegateData,
             tfLiteDequantizeOperatorCode, nodeIndex);
         return kTfLiteError;
     }
-
     const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
     if (IsDynamicTensor(tfLiteOutputTensor))
     {
@@ -54,14 +52,23 @@ TfLiteStatus VisitDequantizeOperator(DelegateData& delegateData,
     armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
     {
-        FORWARD_LAYER_SUPPORT_FUNC("DEQUANTIZE",
-                                   tfLiteContext,
-                                   IsDequantizeSupported,
-                                   delegateData.m_Backends,
-                                   isSupported,
-                                   setBackend,
-                                   inputTensorInfo,
-                                   outputTensorInfo);
+        // If this is a Dequantize with a Constant input then will be replaced by a Constant layer that contains the
+        // dequantized values during optimization so there's no need to check if it can be supported by the backend
+        if (tflite::IsConstantTensor(&tfLiteInputTensor))
+        {
+            isSupported = true;
+        }
+        else
+        {
+            FORWARD_LAYER_SUPPORT_FUNC("DEQUANTIZE",
+                                       tfLiteContext,
+                                       IsDequantizeSupported,
+                                       delegateData.m_Backends,
+                                       isSupported,
+                                       setBackend,
+                                       inputTensorInfo,
+                                       outputTensorInfo);
+        }
     };
 
     if (!delegateData.m_Network)
diff --git a/delegate/classic/src/SharedFunctions.cpp b/delegate/classic/src/SharedFunctions.cpp
index bcff3a1dd0..53136b521e 100644
--- a/delegate/classic/src/SharedFunctions.cpp
+++ b/delegate/classic/src/SharedFunctions.cpp
@@ -110,6 +110,83 @@ TfLiteStatus ValidateFusedActivationOperator(DelegateData& delegateData,
     return isSupported ? kTfLiteOk : kTfLiteError;
 }
 
+TfLiteNode* GetNodeConnectedToInput(TfLiteContext* tfLiteContext,
+                                    int32_t& connectedIndex,
+                                    int32_t inputIdx)
+{
+    TfLiteIntArray* executionPlan = nullptr;
+    if (tfLiteContext->GetExecutionPlan(tfLiteContext, &executionPlan) != kTfLiteOk)
+    {
+        TF_LITE_KERNEL_LOG(tfLiteContext, "TfLiteArmnnDelegate: Unable to get graph execution plan.");
+        return nullptr;
+    }
+
+    for (int i = 0; i < executionPlan->size; ++i)
+    {
+        connectedIndex = executionPlan->data[i];
+
+        // If TfLite nodes can be delegated to ArmNN
+        TfLiteNode* connectedNode = nullptr;
+        TfLiteRegistration* tfLiteRegistration = nullptr;
+        if (tfLiteContext->GetNodeAndRegistration(
+                tfLiteContext, connectedIndex, &connectedNode, &tfLiteRegistration) != kTfLiteOk)
+        {
+            TF_LITE_KERNEL_LOG(tfLiteContext,
+                               "TfLiteArmnnDelegate: Unable to get node and registration for node %d.",
+                               connectedIndex);
+            continue;
+        }
+        for (int j= 0; j < connectedNode->outputs->size; ++j)
+        {
+            if (connectedNode->outputs->data[j] == inputIdx)
+            {
+                return connectedNode;
+            }
+        }
+    }
+    // No node found so set connectedIndex to -1
+    connectedIndex = -1;
+    return nullptr;
+}
+
+bool WillInputBeOptimizedToConst(TfLiteContext* tfLiteContext, int32_t inputIdx)
+{
+    int32_t connectedIndex;
+    TfLiteNode* connectedNode = GetNodeConnectedToInput(tfLiteContext, connectedIndex, inputIdx);
+
+    if (connectedNode)
+    {
+        TfLiteRegistration* tfLiteRegistration = nullptr;
+
+        if (tfLiteContext->GetNodeAndRegistration(tfLiteContext, connectedIndex, &connectedNode, &tfLiteRegistration)
+            == kTfLiteOk)
+        {
+            switch (tfLiteRegistration->builtin_code)
+            {
+                case kTfLiteBuiltinDequantize:
+                {
+                    if (connectedNode->inputs->size >= 1)
+                    {
+                        const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+                        const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[connectedNode->inputs->data[0]];
+
+                        // If the input to the Dequantize is a Constant then both that Constant layer and the Dequantize
+                        // layer will be replaced by a single Constant layer containing the dequantized values.
+                        if (tflite::IsConstantTensor(&tfLiteInputTensor))
+                        {
+                            return true;
+                        }
+                    }
+                    break;
+                }
+                default:
+                {
+                }
+            }
+        }
+    }
+    return false;
+}
 
 } // namespace armnnDelegate
 
diff --git a/delegate/classic/src/SharedFunctions.hpp b/delegate/classic/src/SharedFunctions.hpp
index b03a63ded9..78aa3fef0e 100644
--- a/delegate/classic/src/SharedFunctions.hpp
+++ b/delegate/classic/src/SharedFunctions.hpp
@@ -21,5 +21,11 @@ TfLiteStatus ValidateFusedActivationOperator(DelegateData& delegateData,
                                              const armnn::TensorInfo& outputInfo,
                                              TfLiteFusedActivation activationType);
 
+TfLiteNode* GetNodeConnectedToInput(TfLiteContext* tfLiteContext,
+                                    int32_t& connectedIndex,
+                                    int32_t inputIdx);
+
+bool WillInputBeOptimizedToConst(TfLiteContext* tfLiteContext, int32_t inputIdx);
+
 } // namespace armnnDelegate
 
diff --git a/delegate/opaque/src/Convolution.hpp b/delegate/opaque/src/Convolution.hpp
index 384c62b678..e4393e7bb0 100644
--- a/delegate/opaque/src/Convolution.hpp
+++ b/delegate/opaque/src/Convolution.hpp
@@ -138,6 +138,27 @@ TfLiteStatus VisitConv2dOperator(DelegateData& delegateData,
     armnn::BackendId setBackend;
     if (!delegateData.m_Network)
     {
+        bool filterIsConst = filterTensorInfo.IsConstant();
+
+        if (!filterIsConst)
+        {
+            filterIsConst = WillInputBeOptimizedToConst(tfLiteContext, inputTensors[1]);
+        }
+        armnn::TensorInfo filterTensorInfoCopy(filterTensorInfo);
+        filterTensorInfoCopy.SetConstant(filterIsConst);
+        armnn::Optional<armnn::TensorInfo> optionalBiasInfoCopy(biasTensorInfo);
+
+        if (biasEnabled)
+        {
+            bool biasIsConst = biasTensorInfo.IsConstant();
+
+            if (!biasIsConst)
+            {
+                biasIsConst = WillInputBeOptimizedToConst(tfLiteContext, inputTensors[2]);
+            }
+            optionalBiasInfoCopy.value().SetConstant(biasIsConst);
+        }
+
         bool isSupported = false;
         FORWARD_LAYER_OPAQUE_SUPPORT_FUNC("CONV2D",
                                           tfLiteContext,
@@ -148,8 +169,8 @@ TfLiteStatus VisitConv2dOperator(DelegateData& delegateData,
                                           inputTensorInfo,
                                           outputTensorInfo,
                                           descriptor,
-                                          filterTensorInfo,
-                                          optionalBiasInfo);
+                                          filterTensorInfoCopy,
+                                          optionalBiasInfoCopy);
         return isSupported ? kTfLiteOk : kTfLiteError;
     }
 
@@ -339,6 +360,28 @@ TfLiteStatus VisitDepthwiseConv2dOperator(DelegateData& delegateData,
     armnn::BackendId setBackend;
     if (!delegateData.m_Network)
     {
+        bool filterIsConst = filterTensorInfo.IsConstant();
+
+        if (!filterIsConst)
+        {
+            filterIsConst = WillInputBeOptimizedToConst(tfLiteContext, inputTensors[1]);
+        }
+        armnn::TensorInfo filterTensorInfoCopy(filterTensorInfo);
+        filterTensorInfoCopy.SetConstant(filterIsConst);
+
+        armnn::Optional<armnn::TensorInfo> optionalBiasInfoCopy(biasTensorInfo);
+
+        if (biasEnabled)
+        {
+            bool biasIsConst = biasTensorInfo.IsConstant();
+
+            if (!biasIsConst)
+            {
+                biasIsConst = WillInputBeOptimizedToConst(tfLiteContext, inputTensors[2]);
+            }
+            optionalBiasInfoCopy.value().SetConstant(biasIsConst);
+        }
+
         bool isSupported = false;
         FORWARD_LAYER_OPAQUE_SUPPORT_FUNC("DEPTHWISE_CONV2D",
                                           tfLiteContext,
@@ -349,8 +392,8 @@ TfLiteStatus VisitDepthwiseConv2dOperator(DelegateData& delegateData,
                                           inputTensorInfo,
                                           outputTensorInfo,
                                           descriptor,
-                                          filterTensorInfo,
-                                          armnn::Optional<armnn::TensorInfo>(biasTensorInfo));
+                                          filterTensorInfoCopy,
+                                          optionalBiasInfoCopy);
         return isSupported ? kTfLiteOk : kTfLiteError;
     }
 
diff --git a/delegate/opaque/src/Quantization.hpp b/delegate/opaque/src/Quantization.hpp
index d7f5c5c73f..e2e5f7618d 100644
--- a/delegate/opaque/src/Quantization.hpp
+++ b/delegate/opaque/src/Quantization.hpp
@@ -31,6 +31,7 @@ TfLiteStatus VisitDequantizeOperator(DelegateData& delegateData,
     }
 
     const TfLiteOpaqueTensor* tfLiteInputTensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, inputTensors[0]);
+
     if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
     {
         return kTfLiteError;
@@ -63,14 +64,23 @@ TfLiteStatus VisitDequantizeOperator(DelegateData& delegateData,
     armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
     {
-        FORWARD_LAYER_OPAQUE_SUPPORT_FUNC("DEQUANTIZE",
-                                          tfLiteContext,
-                                          IsDequantizeSupported,
-                                          delegateData.m_Backends,
-                                          isSupported,
-                                          setBackend,
-                                          inputTensorInfo,
-                                          outputTensorInfo);
+        // If this is a Dequantize with a Constant input then will be replaced by a Constant layer that contains the
+        // dequantized values during optimization so there's no need to check if it can be supported by the backend
+        if (IsConstantTensor(tfLiteInputTensor))
+        {
+            isSupported = true;
+        }
+        else
+        {
+            FORWARD_LAYER_OPAQUE_SUPPORT_FUNC("DEQUANTIZE",
+                                              tfLiteContext,
+                                              IsDequantizeSupported,
+                                              delegateData.m_Backends,
+                                              isSupported,
+                                              setBackend,
+                                              inputTensorInfo,
+                                              outputTensorInfo);
+        }
     };
 
     if (!delegateData.m_Network)
diff --git a/delegate/opaque/src/SharedFunctions.cpp b/delegate/opaque/src/SharedFunctions.cpp
index 93eb143bd0..0a0c630697 100644
--- a/delegate/opaque/src/SharedFunctions.cpp
+++ b/delegate/opaque/src/SharedFunctions.cpp
@@ -100,5 +100,105 @@ TfLiteStatus ValidateFusedActivationOperator(DelegateData& delegateData,
     return isSupported ? kTfLiteOk : kTfLiteError;
 }
 
+TfLiteOpaqueNode* GetNodeConnectedToInput(TfLiteOpaqueContext* tfLiteContext,
+                                          int32_t& connectedIndex,
+                                          int32_t inputIdx)
+{
+    TfLiteIntArray* executionPlan = nullptr;
+    if (TfLiteOpaqueContextGetExecutionPlan(tfLiteContext, &executionPlan) != kTfLiteOk)
+    {
+        TF_LITE_OPAQUE_MAYBE_KERNEL_LOG(tfLiteContext, "TfLiteArmnnDelegate: Unable to get graph execution plan.");
+        return nullptr;
+    }
+
+    for (int i = 0; i < executionPlan->size; ++i)
+    {
+        connectedIndex = executionPlan->data[i];
+
+        // If TfLite nodes can be delegated to ArmNN
+        TfLiteOpaqueNode* connectedNode = nullptr;
+        TfLiteRegistrationExternal* tfLiteRegistration = nullptr;
+        if (TfLiteOpaqueContextGetNodeAndRegistration(
+                tfLiteContext, connectedIndex, &connectedNode, &tfLiteRegistration) != kTfLiteOk)
+        {
+            TF_LITE_OPAQUE_MAYBE_KERNEL_LOG(tfLiteContext,
+                                            "TfLiteArmnnOpaqueDelegate: Unable to get node and registration for node "
+                                            "%d.", connectedIndex);
+            continue;
+        }
+        int numOutputs = 0;
+        const int* outputTensors;
+
+        if (TfLiteOpaqueNodeOutputs(connectedNode, &outputTensors, &numOutputs) != kTfLiteOk)
+        {
+            TF_LITE_OPAQUE_MAYBE_KERNEL_LOG(
+                    tfLiteContext,
+                    "TfLiteArmnnOpaqueDelegate: Unable to gather output tensor indices from node #%d: ",
+                    connectedIndex);
+            continue;
+        }
+
+        for (int j= 0; j < numOutputs; ++j)
+        {
+            if (outputTensors[j] == inputIdx)
+            {
+                return connectedNode;
+            }
+        }
+    }
+    // No node found so set connectedIndex to -1
+    connectedIndex = -1;
+    return nullptr;
+}
+
+bool WillInputBeOptimizedToConst(TfLiteOpaqueContext* tfLiteContext, int32_t inputIdx)
+{
+    int32_t connectedIndex;
+    TfLiteOpaqueNode* connectedNode = GetNodeConnectedToInput(tfLiteContext, connectedIndex, inputIdx);
+
+    if (connectedNode)
+    {
+        TfLiteRegistrationExternal* tfLiteRegistration = nullptr;
+
+        if (TfLiteOpaqueContextGetNodeAndRegistration(tfLiteContext, connectedIndex, &connectedNode,
+                                                      &tfLiteRegistration) == kTfLiteOk)
+        {
+            switch (TfLiteRegistrationExternalGetBuiltInCode(tfLiteRegistration))
+            {
+                case kTfLiteBuiltinDequantize:
+                {
+                    auto numInputs = TfLiteOpaqueNodeNumberOfInputs(connectedNode);
+                    if (numInputs >= 1)
+                    {
+                        const int* inputTensors;
+                        if (TfLiteOpaqueNodeInputs(connectedNode, &inputTensors, &numInputs) != kTfLiteOk)
+                        {
+                            TF_LITE_OPAQUE_MAYBE_KERNEL_LOG(
+                                    tfLiteContext,
+                                    "TfLiteArmnnOpaqueDelegate: Unable to gather input tensor indices from node #%d: ",
+                                    connectedIndex);
+                            return kTfLiteError;
+                        }
+                        const TfLiteOpaqueTensor* tfLiteInputTensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext,
+                                inputTensors[0]);
+
+                        // If the input to the Dequantize is a Constant then both that Constant layer and the Dequantize
+                        // layer will be replaced by a single Constant layer containing the dequantized values.
+                        if (IsConstantTensor(tfLiteInputTensor))
+                        {
+                            return true;
+                        }
+                    }
+                    break;
+                }
+                default:
+                {
+                }
+            }
+        }
+    }
+    return false;
+}
+
 } // namespace armnnDelegate
 
diff --git a/delegate/opaque/src/SharedFunctions.hpp b/delegate/opaque/src/SharedFunctions.hpp
index 72fbe6a332..4698a1e989 100644
--- a/delegate/opaque/src/SharedFunctions.hpp
+++ b/delegate/opaque/src/SharedFunctions.hpp
@@ -23,5 +23,11 @@ TfLiteStatus ValidateFusedActivationOperator(DelegateData& delegateData,
                                              const armnn::TensorInfo& outputInfo,
                                              TfLiteFusedActivation activationType);
 
+TfLiteOpaqueNode* GetNodeConnectedToInput(TfLiteOpaqueContext* tfLiteContext,
+                                          int32_t& connectedIndex,
+                                          int32_t inputIdx);
+
+bool WillInputBeOptimizedToConst(TfLiteOpaqueContext* tfLiteContext, int32_t inputIdx);
+
 } // namespace armnnOpaqueDelegate
 
-- 
cgit v1.2.1