From a37ccb006ad0bc49c34ff0d1741fe04d9ca55d5b Mon Sep 17 00:00:00 2001
From: Ryan OShea <ryan.oshea3@arm.com>
Date: Tue, 11 Apr 2023 10:54:07 +0100
Subject: IVGCVSW-7564 Implement Cast operator for Opaque Delegate

 * Adds VisitCast function to Redefine.hpp
 * Enables Cast Test for OpaqueUnitTests
 * Various Fixes to the opaque delegate to allow operator to run

Signed-off-by: Ryan OShea <ryan.oshea3@arm.com>
Change-Id: I43d42eea5c987d6aed8a0f909a6bf583fddcc94e
---
 delegate/opaque/src/OpaqueDelegateUtils.hpp | 151 +++++++++++-----------------
 delegate/opaque/src/Redefine.hpp            |  96 ++++++++++++++++++
 delegate/opaque/src/armnn_delegate.cpp      |  69 +++++++------
 3 files changed, 195 insertions(+), 121 deletions(-)

(limited to 'delegate/opaque/src')
diff --git a/delegate/opaque/src/OpaqueDelegateUtils.hpp b/delegate/opaque/src/OpaqueDelegateUtils.hpp
index d4ef9ca9c5..688c683fa8 100644
--- a/delegate/opaque/src/OpaqueDelegateUtils.hpp
+++ b/delegate/opaque/src/OpaqueDelegateUtils.hpp
@@ -115,9 +115,9 @@ bool IsConstantTensor(const TfLiteOpaqueTensor* tfLiteTensor)
     return false;
 }
 
-bool IsDynamicTensor(const TfLiteOpaqueTensor& tfLiteTensor)
+bool IsDynamicTensor(const TfLiteOpaqueTensor* tfLiteTensor)
 {
-    auto tensorAllocationType = TfLiteOpaqueTensorGetAllocationType(&tfLiteTensor);
+    auto tensorAllocationType = TfLiteOpaqueTensorGetAllocationType(tfLiteTensor);
     if (tensorAllocationType == kTfLiteDynamic)
     {
         return true;
@@ -131,11 +131,11 @@ bool IsValid(const TfLiteOpaqueTensor* tfLiteTensor)
 }
 
 bool IsValid(TfLiteOpaqueContext* tfLiteContext,
-             const TfLiteOpaqueTensor& tfLiteTensor,
+             const TfLiteOpaqueTensor* tfLiteTensor,
              int32_t operatorCode,
              int32_t nodeIndex)
 {
-    if(!IsValid(&tfLiteTensor))
+    if(!IsValid(tfLiteTensor))
     {
         TF_LITE_OPAQUE_MAYBE_KERNEL_LOG(
                 tfLiteContext,
@@ -164,89 +164,52 @@ bool IsAffineQuantization(const TfLiteOpaqueTensor& tfLiteTensor)
     return false;
 }
 
-// Load input indices into array if found and validate.
-// This replaces node->inputs->data.
-TfLiteStatus GetInputIndices(const int* inputIndices,
-                             TfLiteOpaqueNode* tfLiteNode,
-                             TfLiteOpaqueContext* tfLiteContext,
-                             unsigned int numInputs)
-{
-    int actualNumInputs = 0;
-
-    TfLiteStatus status = TfLiteOpaqueNodeInputs(tfLiteNode, &inputIndices, &actualNumInputs);
-    if(status != kTfLiteOk)
-    {
-        TF_LITE_OPAQUE_MAYBE_KERNEL_LOG(
-                tfLiteContext, "TfLiteArmnnOpaqueDelegate: Unable to gather input information from node.");
-        return kTfLiteError;
-    }
-
-    if (static_cast<unsigned int>(actualNumInputs) != numInputs)
-    {
-        TF_LITE_OPAQUE_MAYBE_KERNEL_LOG(
-                tfLiteContext, "TfLiteArmnnOpaqueDelegate: Unexpected number of inputs (%d != %d) in node.",
-                actualNumInputs, numInputs);
-        return kTfLiteError;
-    }
-
-    return kTfLiteOk;
-}
-
-// Load output indices into array if found and validate.
-// This replaces node->outputs->data.
-TfLiteStatus GetOutputIndices(const int* outputIndices,
-                              TfLiteOpaqueNode* tfLiteNode,
-                              TfLiteOpaqueContext* tfLiteContext,
-                              unsigned int numOutputs)
-{
-    int actualNumOutputs = 0;
-
-    TfLiteStatus status = TfLiteOpaqueNodeOutputs(tfLiteNode, &outputIndices, &actualNumOutputs);
-    if(status != kTfLiteOk)
-    {
-        TF_LITE_OPAQUE_MAYBE_KERNEL_LOG(
-                tfLiteContext, "TfLiteArmnnOpaqueDelegate: Unable to gather output information from node.");
-        return kTfLiteError;
-    }
-
-    if (static_cast<unsigned int>(actualNumOutputs) != numOutputs)
-    {
-        TF_LITE_OPAQUE_MAYBE_KERNEL_LOG(
-                tfLiteContext, "TfLiteArmnnOpaqueDelegate: Unexpected number of outputs (%d != %d) in node.",
-                actualNumOutputs, numOutputs);
-        return kTfLiteError;
-    }
-
-    return kTfLiteOk;
-}
-
+// Connects the layer to the graph
 TfLiteStatus Connect(armnn::IConnectableLayer* layer,
                      TfLiteOpaqueContext* tfLiteContext,
                      TfLiteOpaqueNode* tfLiteNode,
                      armnnOpaqueDelegate::DelegateData& data)
 {
-    // Get array of indices, replaces node->inputs->data
-    const int* inputIndices = nullptr;
-    TfLiteStatus inputStatus = GetInputIndices(inputIndices, tfLiteNode, tfLiteContext, layer->GetNumInputSlots());
-    if(inputStatus != kTfLiteOk)
+    // Get array of input indices, inputIndexArray is set from the TfLiteOpaqueNodeInputs function
+    // This function turns inputIndexArray into an int array of indices. These indices point to the index of the
+    // tensors for each input slot in the node.
+    const int* inputIndexArray;
+    int numInputs;
+    if(TfLiteOpaqueNodeInputs(tfLiteNode, &inputIndexArray, &numInputs) != kTfLiteOk)
     {
         return kTfLiteError;
     }
-
-    // Connect the input slots
+    // numInputs is set from TfLiteOpaqueNodeInputs.
+    if(numInputs != static_cast<int>(layer->GetNumInputSlots()))
+    {
+        ARMNN_LOG(error) << "Layer: " << layer->GetName() << ": Expected number of input slots does not match actual "
+                                                          "number of input slots.";
+        return kTfLiteError;
+    }
+    // Connect the input slots.
+    // For each input slot, get the index of the opaque tensor that was allocated for it.
     for (unsigned int inputIndex = 0; inputIndex < layer->GetNumInputSlots(); ++inputIndex)
     {
-        if (data.m_OutputSlotForNode[inputIndices[inputIndex]] != nullptr)
+        if (data.m_OutputSlotForNode[inputIndexArray[inputIndex]] != nullptr)
         {
-            data.m_OutputSlotForNode[inputIndices[inputIndex]]->Connect(layer->GetInputSlot(inputIndex));
+            data.m_OutputSlotForNode[inputIndexArray[inputIndex]]->Connect(layer->GetInputSlot(inputIndex));
         }
     }
 
-    // Get array of indices, replaces node->outputs->data
-    const int* outputIndices = nullptr;
-    TfLiteStatus outputStatus = GetOutputIndices(outputIndices, tfLiteNode, tfLiteContext, layer->GetNumOutputSlots());
-    if(outputStatus != kTfLiteOk)
+    // Get array of output indices, outputIndexArray is set from the TfLiteOpaqueNodeOutputs function
+    // This function turns outputIndexArray into an int array of indices. These indices point to the tensors for
+    // each output slot in the node.
+    const int* outputIndexArray;
+    int numOutputs;
+    if(TfLiteOpaqueNodeOutputs(tfLiteNode, &outputIndexArray, &numOutputs) != kTfLiteOk)
+    {
+        return kTfLiteError;
+    }
+    // numOutputs is set from TfLiteOpaqueNodeOutputs.
+    if(numOutputs != static_cast<int>(layer->GetNumOutputSlots()))
     {
+        ARMNN_LOG(error) << "Layer: " << layer->GetName() << ": Expected number of output slots does not match actual "
+                                                             "number of output slots.";
         return kTfLiteError;
     }
 
@@ -254,7 +217,7 @@ TfLiteStatus Connect(armnn::IConnectableLayer* layer,
     for (unsigned int outputIndex = 0; outputIndex < layer->GetNumOutputSlots(); ++outputIndex)
     {
         armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(outputIndex);
-        data.m_OutputSlotForNode[static_cast<unsigned long>(outputIndices[outputIndex])] = &outputSlot;
+        data.m_OutputSlotForNode[static_cast<unsigned long>(outputIndexArray[outputIndex])] = &outputSlot;
     }
 
     return kTfLiteOk;
@@ -334,13 +297,13 @@ TfLiteStatus FusedActivation(TfLiteOpaqueContext* tfLiteContext,
     ARMNN_ASSERT(activationLayer != nullptr);
     activationLayer->GetOutputSlot(0).SetTensorInfo(activationOutputInfo);
 
-    // Get array of indices, replaces node->outputs->data
-    const int* outputIndices = nullptr;
-    TfLiteStatus status = GetOutputIndices(outputIndices,
-                                           tfLiteNode,
-                                           tfLiteContext,
-                                           activationLayer->GetNumOutputSlots());
-    if(status != kTfLiteOk)
+    // Get array of output indices, outputIndexArray is set from the TfLiteOpaqueNodeOutputs function
+    // This function turns outputIndexArray into an int array of indices. These indices point to the tensors for
+    // each output slot in the node.
+    const int* outputIndexArray;
+    int numOutputs;
+    TfLiteStatus outputStatus = TfLiteOpaqueNodeOutputs(tfLiteNode, &outputIndexArray, &numOutputs);
+    if(outputStatus != kTfLiteOk)
     {
         return kTfLiteError;
     }
@@ -349,10 +312,10 @@ TfLiteStatus FusedActivation(TfLiteOpaqueContext* tfLiteContext,
     for (unsigned int outputIndex = 0; outputIndex < activationLayer->GetNumOutputSlots(); ++outputIndex)
     {
         data.m_OutputSlotForNode[static_cast<unsigned long>(
-                outputIndices[outputIndex])]->Connect(activationLayer->GetInputSlot(0));
+                outputIndexArray[outputIndex])]->Connect(activationLayer->GetInputSlot(0));
 
         armnn::IOutputSlot& outputSlot = activationLayer->GetOutputSlot(outputIndex);
-        data.m_OutputSlotForNode[static_cast<unsigned long>(outputIndices[outputIndex])] = &outputSlot;
+        data.m_OutputSlotForNode[static_cast<unsigned long>(outputIndexArray[outputIndex])] = &outputSlot;
     }
     return kTfLiteOk;
 }
@@ -586,11 +549,13 @@ armnn::ConstTensor* GetConstTensorForTfLiteTensor(const TfLiteOpaqueContext* tfL
 
 bool IsOptionalOperandPresent(TfLiteOpaqueNode* tfLiteNode, const int operandIndex)
 {
-    // Gather array of indices and it's length, replaces node->inputs->data[i] and node->inputs->size
-    const int* inputIndices = nullptr;
+    // Get array of input indices, inputIndexArray is set from the TfLiteOpaqueNodeInputs function
+    // This function turns inputIndexArray into an int array of indices. These indices point to the index of the
+    // tensors for each input slot in the node.
+    const int* inputIndexArray;
     int numInputs = 0;
 
-    TfLiteStatus status = TfLiteOpaqueNodeInputs(tfLiteNode, &inputIndices, &numInputs);
+    TfLiteStatus status = TfLiteOpaqueNodeInputs(tfLiteNode, &inputIndexArray, &numInputs);
     if(status != kTfLiteOk)
     {
         throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to gather input information from node.");
@@ -598,7 +563,7 @@ bool IsOptionalOperandPresent(TfLiteOpaqueNode* tfLiteNode, const int operandInd
 
     // If the inputs array has fewer than operandIndex entries or if the entry at operandIndex has a value of -1 or
     // less then the input is not present.
-    if (numInputs > operandIndex && inputIndices[operandIndex] >= 0)
+    if (numInputs > operandIndex && inputIndexArray[operandIndex] >= 0)
     {
         return true;
     }
@@ -610,12 +575,16 @@ TfLiteStatus ProcessInputs(armnn::IConnectableLayer* layer,
                            TfLiteOpaqueContext* tfLiteContext,
                            TfLiteOpaqueNode* tfLiteNode)
 {
-    // Get array of indices, replaces node->inputs->data
-    const int* inputIndices = nullptr;
-    TfLiteStatus status = GetInputIndices(inputIndices, tfLiteNode, tfLiteContext, layer->GetNumInputSlots());
+    // Get array of input indices, inputIndexArray is set from the TfLiteOpaqueNodeInputs function
+    // This function turns inputIndexArray into an int array of indices. These indices point to the index of the
+    // tensors for each input slot in the node.
+    const int* inputIndexArray;
+    int numInputs = 0;
+
+    TfLiteStatus status = TfLiteOpaqueNodeInputs(tfLiteNode, &inputIndexArray, &numInputs);
     if(status != kTfLiteOk)
     {
-        return kTfLiteError;
+        throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to gather input information from node.");
     }
 
     // Process input tensors
@@ -649,7 +618,7 @@ TfLiteStatus ProcessInputs(armnn::IConnectableLayer* layer,
             armnn::IOutputSlot& outputSlot = constantLayer->GetOutputSlot(0);
             outputSlot.SetTensorInfo(inputTensorInfo);
 
-            delegateData.m_OutputSlotForNode[inputIndices[inputIndex]] = &outputSlot;
+            delegateData.m_OutputSlotForNode[inputIndexArray[inputIndex]] = &outputSlot;
         }
     }
     return kTfLiteOk;
diff --git a/delegate/opaque/src/Redefine.hpp b/delegate/opaque/src/Redefine.hpp
index e16969768e..7dd8561de4 100644
--- a/delegate/opaque/src/Redefine.hpp
+++ b/delegate/opaque/src/Redefine.hpp
@@ -2,3 +2,99 @@
 // Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
+#pragma once
+
+#include <armnn/utility/IgnoreUnused.hpp>
+
+#include "OpaqueDelegateUtils.hpp"
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+#include <numeric>
+
+namespace armnnOpaqueDelegate
+{
+
+TfLiteStatus VisitCastOperator(DelegateData& delegateData,
+                               TfLiteOpaqueContext* tfLiteContext,
+                               TfLiteOpaqueNode* tfLiteNode,
+                               int nodeIndex,
+                               int32_t operatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+    int numInputs = 0;
+    const int* inputTensors;
+    if (TfLiteOpaqueNodeInputs(tfLiteNode, &inputTensors, &numInputs) != kTfLiteOk)
+    {
+        return kTfLiteError;
+    }
+
+    // This layer only has 1 input, so we can directly assign tensor[0] to a new opaque tensor
+    const TfLiteOpaqueTensor*
+          tfLiteInputTensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, inputTensors[numInputs-1]);
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    int numOutputs = 0;
+    const int* outputTensors;
+    if (TfLiteOpaqueNodeOutputs(tfLiteNode, &outputTensors, &numOutputs) != kTfLiteOk)
+    {
+        return kTfLiteError;
+    }
+
+    // This layer only has 1 output, so we can directly assign tensor[0] to a new opaque tensor
+    const TfLiteOpaqueTensor*
+          tfLiteOutputTensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, outputTensors[numOutputs-1]);
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteOpaqueTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteOpaqueTensor(tfLiteOutputTensor, true);
+
+    bool             isSupported  = false;
+    armnn::BackendId setBackend;
+    auto             validateFunc = [&](const armnn::TensorInfo& outInfo, bool& isSupported) {
+        FORWARD_LAYER_OPAQUE_SUPPORT_FUNC("CAST",
+                                   tfLiteContext,
+                                   IsCastSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   setBackend,
+                                   inputTensorInfo,
+                                   outInfo);
+    };
+
+    // If the m_Network is a nullptr, this signals that a prerequisite TfLite callback is required to clarify the
+    // support for the operator
+    // If supported, VisitCastOperator will be called again to add the layer to the network as seen further below
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // Add a Cast layer
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddCastLayer();
+    layer->SetBackendId(setBackend);
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // try to connect the Constant Inputs if there are any
+    if (ProcessInputs(layer, delegateData, tfLiteContext, tfLiteNode) != kTfLiteOk)
+    {
+        return kTfLiteError;
+    }
+
+    // Connect
+    return Connect(layer, tfLiteContext, tfLiteNode, delegateData);
+}
+}
diff --git a/delegate/opaque/src/armnn_delegate.cpp b/delegate/opaque/src/armnn_delegate.cpp
index ee1a4ed211..04a4eae12e 100644
--- a/delegate/opaque/src/armnn_delegate.cpp
+++ b/delegate/opaque/src/armnn_delegate.cpp
@@ -106,11 +106,14 @@ ArmnnOpaqueDelegate::ArmnnOpaqueDelegate(armnnDelegate::DelegateOptions options)
     TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, "TfLiteArmnnOpaqueDelegate: Created TfLite ArmNN delegate.");
 }
 
-TfLiteStatus DoPrepare(TfLiteOpaqueContext* tfLiteContext, TfLiteOpaqueDelegate* tfLiteDelegate)
+TfLiteStatus DoPrepare(TfLiteOpaqueContext* tfLiteContext, TfLiteOpaqueDelegate* tfLiteDelegate, void* data)
 {
+    // We are required to have the void* data parameter in the function signature, but we don't actually use it.
+    armnn::IgnoreUnused(data);
+
     TfLiteIntArray* supportedOperators =
             static_cast<::armnnOpaqueDelegate::ArmnnOpaqueDelegate*>
-                    (tfLiteDelegate->data_)->IdentifyOperatorsToDelegate(tfLiteContext);
+                    (TfLiteOpaqueDelegateGetData(tfLiteDelegate))->IdentifyOperatorsToDelegate(tfLiteContext);
     if(supportedOperators == nullptr)
     {
         return kTfLiteError;
@@ -142,7 +145,7 @@ TfLiteStatus DoPrepare(TfLiteOpaqueContext* tfLiteContext, TfLiteOpaqueDelegate*
                         ArmnnSubgraph::Create(tfLiteContext,
                                               parameters,
                                               static_cast<::armnnOpaqueDelegate::ArmnnOpaqueDelegate*>(
-                                                      parameters->delegate->data_)));
+                                                      parameters->delegate->opaque_delegate_builder->data)));
             }
     );
 
@@ -366,7 +369,7 @@ TfLiteStatus ArmnnSubgraph::AddOutputLayer(DelegateData& delegateData,
         const int32_t tensorId = outputs->data[i];
         const TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, tensorId);
 
-        if(!tensor)
+        if(!IsValid(tensor))
         {
             return kTfLiteError;
         }
@@ -411,8 +414,7 @@ ArmnnSubgraph* ArmnnSubgraph::Create(TfLiteOpaqueContext* tfLiteContext,
     std::vector<armnn::BindingPointInfo> outputBindings;
 
     // Add input layer
-    auto status = AddInputLayer(delegateData, tfLiteContext, parameters->input_tensors, inputBindings);
-    if (status != kTfLiteOk)
+    if (AddInputLayer(delegateData, tfLiteContext, parameters->input_tensors, inputBindings) != kTfLiteOk)
     {
         throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to add Inputs to the network!");
     }
@@ -440,8 +442,7 @@ ArmnnSubgraph* ArmnnSubgraph::Create(TfLiteOpaqueContext* tfLiteContext,
                     << std::fixed << armnn::GetTimeDuration(parseStartTime).count() << " ms";
 
     // Add Output layer
-    status = AddOutputLayer(delegateData, tfLiteContext, parameters->output_tensors, outputBindings);
-    if (status != kTfLiteOk)
+    if (AddOutputLayer(delegateData, tfLiteContext, parameters->output_tensors, outputBindings) != kTfLiteOk)
     {
         throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to add Outputs to the network!");
     }
@@ -544,24 +545,27 @@ TfLiteStatus ArmnnSubgraph::Prepare(TfLiteOpaqueContext* tfLiteContext)
 
 TfLiteStatus ArmnnSubgraph::Invoke(TfLiteOpaqueContext* tfLiteContext, TfLiteOpaqueNode* tfLiteNode)
 {
-    // Prepare inputs
-    armnn::InputTensors inputTensors;
-    size_t inputIndex = 0;
-    const int* inputs;
+    // Get array of input indices, inputIndexArray is set from the TfLiteOpaqueNodeInputs function
+    // This function turns inputIndexArray into an int array of indices. These indices point to the tensors for
+    // each input slot in the node.
+    const int* inputIndexArray;
     int numInputs;
-    if(TfLiteOpaqueNodeInputs(tfLiteNode, &inputs, &numInputs) != kTfLiteOk)
+    if(TfLiteOpaqueNodeInputs(tfLiteNode, &inputIndexArray, &numInputs) != kTfLiteOk)
     {
         throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to load subgraph inputs!");
     }
+    // Prepare inputs
+    armnn::InputTensors inputTensors;
+    size_t inputIndex = 0;
     for (int inputIdx = 0; inputIdx < numInputs; inputIdx++)
     {
-        TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, inputs[inputIdx]);
+        TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, inputIndexArray[inputIdx]);
 
-        if(!tensor)
+        if(!IsValid(tensor))
         {
             return kTfLiteError;
         }
-
+        // If tensor is not read only
         if (TfLiteOpaqueTensorGetAllocationType(tensor) != kTfLiteMmapRo)
         {
             const armnn::BindingPointInfo& inputBinding = m_InputBindings[inputIndex];
@@ -574,29 +578,29 @@ TfLiteStatus ArmnnSubgraph::Invoke(TfLiteOpaqueContext* tfLiteContext, TfLiteOpa
         }
     }
 
-    // Prepare outputs
-    armnn::OutputTensors outputTensors;
-    size_t outputIndex = 0;
-    const int* outputs;
+    // Get array of output indices, outputIndexArray is set from the TfLiteOpaqueNodeOutputs function
+    // This function turns outputIndexArray into an int array of indices. These indices point to the tensors for
+    // each output slot in the node.
+    const int* outputIndexArray;
     int numOutputs;
-    if(TfLiteOpaqueNodeOutputs(tfLiteNode, &outputs, &numOutputs) != kTfLiteOk)
+    if(TfLiteOpaqueNodeOutputs(tfLiteNode, &outputIndexArray, &numOutputs) != kTfLiteOk)
     {
         throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to load subgraph outputs!");
     }
+    // Assign the tensors from the outputIndexArray to the armnn BindingPointInfo
+    armnn::OutputTensors outputTensors;
     for (int outputIdx = 0; outputIdx < numOutputs; outputIdx++)
     {
-        const armnn::BindingPointInfo& outputBinding = m_OutputBindings[outputIndex];
-        TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, outputs[outputIdx]);
-
-        if(!tensor)
+        const armnn::BindingPointInfo& outputBinding = m_OutputBindings[outputIdx];
+        TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, outputIndexArray[outputIdx]);
+        if(!IsValid(tensor))
         {
             return kTfLiteError;
         }
 
-        const armnn::Tensor outputTensor(outputBinding.second, TfLiteOpaqueTensorData(tensor));
-        outputTensors.emplace_back(outputIdx, outputTensor);
-
-        ++outputIndex;
+        const armnn::Tensor outputTensor(outputBinding.second, reinterpret_cast<TfLiteTensor*>(tensor)->data
+        .data);
+        outputTensors.emplace_back(outputIndexArray[outputIdx], outputTensor);
     }
 
     // Run graph
@@ -618,9 +622,14 @@ TfLiteStatus ArmnnSubgraph::VisitNode(DelegateData& delegateData,
 {
     switch (TfLiteRegistrationExternalGetBuiltInCode(tfLiteRegistration))
     {
+        case kTfLiteBuiltinCast:
+            return VisitCastOperator(delegateData,
+                                     tfLiteContext,
+                                     tfLiteNode,
+                                     nodeIndex,
+                                     kTfLiteBuiltinCast);
         default:
             return kTfLiteError;
     }
 }
-
 } // armnnOpaqueDelegate namespace
\ No newline at end of file
-- 
cgit v1.2.1