From 34fa1bd7994af9abf52dbcc4aa808d0fa5f14aa3 Mon Sep 17 00:00:00 2001
From: Sadik Armagan <sadik.armagan@arm.com>
Date: Fri, 27 Nov 2020 12:40:52 +0000
Subject: IVGCVSW-5393 'TfLiteDelegate: Implement the split operators'

* Added SPLIT and SPLIT_V support to armnn_delegate

Signed-off-by: Sadik Armagan <sadik.armagan@arm.com>
Change-Id: I2def9b8be783b25ef17a997e521c6027553035d3
---
 delegate/CMakeLists.txt                   |   3 +
 delegate/TensorFlowLiteDelegateSupport.md |   4 +
 delegate/src/DelegateUtils.hpp            |  43 +++-
 delegate/src/Split.hpp                    | 331 +++++++++++++++++++++++++++
 delegate/src/armnn_delegate.cpp           |  13 ++
 delegate/src/test/SplitTest.cpp           | 262 +++++++++++++++++++++
 delegate/src/test/SplitTestHelper.hpp     | 368 ++++++++++++++++++++++++++++++
 delegate/src/test/TestUtils.hpp           |   7 +-
 8 files changed, 1018 insertions(+), 13 deletions(-)
 create mode 100644 delegate/src/Split.hpp
 create mode 100644 delegate/src/test/SplitTest.cpp
 create mode 100644 delegate/src/test/SplitTestHelper.hpp

diff --git a/delegate/CMakeLists.txt b/delegate/CMakeLists.txt
index c04472e828..677a38ea4a 100644
--- a/delegate/CMakeLists.txt
+++ b/delegate/CMakeLists.txt
@@ -39,6 +39,7 @@ list(APPEND armnnDelegate_sources
         src/Slice.hpp
         src/Softmax.hpp
         src/SpaceDepth.hpp
+        src/Split.hpp
         src/Transpose.hpp)
 
 add_library(armnnDelegate SHARED ${armnnDelegate_sources})
@@ -132,6 +133,8 @@ if(BUILD_UNIT_TESTS)
         src/test/ResizeTestHelper.hpp
         src/test/SoftmaxTest.cpp
         src/test/SoftmaxTestHelper.hpp
+        src/test/SplitTest.cpp
+        src/test/SplitTestHelper.hpp
         src/test/TestUtils.hpp
         src/test/TestUtils.cpp
         src/test/TransposeTest.cpp
diff --git a/delegate/TensorFlowLiteDelegateSupport.md b/delegate/TensorFlowLiteDelegateSupport.md
index 00938b9be6..d94f14e485 100644
--- a/delegate/TensorFlowLiteDelegateSupport.md
+++ b/delegate/TensorFlowLiteDelegateSupport.md
@@ -74,6 +74,10 @@ The Arm NN SDK TensorFlow Lite delegate currently supports the following operato
 
 * SOFTMAX
 
+* SPLIT
+
+* SPLIT_V
+
 * SQRT
 
 * SUB
diff --git a/delegate/src/DelegateUtils.hpp b/delegate/src/DelegateUtils.hpp
index fad07ff267..9f39a30727 100644
--- a/delegate/src/DelegateUtils.hpp
+++ b/delegate/src/DelegateUtils.hpp
@@ -98,31 +98,54 @@ TfLiteStatus ValidateNumOutputs(TfLiteContext* tfLiteContext,
     return kTfLiteOk;
 }
 
+bool IsDynamicTensor(const TfLiteTensor& tfLiteTensor)
+{
+    auto tensorAllocationType = tfLiteTensor.allocation_type;
+    if (tensorAllocationType == kTfLiteDynamic)
+    {
+        return true;
+    }
+    return false;
+}
+
 bool IsValid(const TfLiteTensor* tfLiteTensor)
 {
     return tfLiteTensor == nullptr ? false : true;
 }
 
-uint32_t NonNegative(int32_t value, int nodeIndex)
+bool IsValid(TfLiteContext* tfLiteContext, const TfLiteTensor& tfLiteTensor, int32_t operatorCode, int32_t nodeIndex)
 {
-    if (value < 0)
+    if(!IsValid(&tfLiteTensor))
     {
-        throw armnn::Exception("TfLiteArmnnDelegate: Non-negative value in node " + nodeIndex);
+        std::cout << "..Is Not Valid" << std::endl;
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid TfLite tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return false;
     }
-    else
+    if (IsDynamicTensor(tfLiteTensor))
     {
-        return static_cast<uint32_t>(value);
+        std::cout << "..IsDynamicTensor" << std::endl;
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic tensors are not supported in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return false;
     }
+    return true;
 }
 
-bool IsDynamicTensor(const TfLiteTensor& tfLiteTensor)
+uint32_t NonNegative(int32_t value, int nodeIndex)
 {
-    auto tensorAllocationType = tfLiteTensor.allocation_type;
-    if (tensorAllocationType == kTfLiteDynamic)
+    if (value < 0)
     {
-        return true;
+        throw armnn::Exception("TfLiteArmnnDelegate: Non-negative value in node " + nodeIndex);
+    }
+    else
+    {
+        return static_cast<uint32_t>(value);
     }
-    return false;
 }
 
 bool IsAffineQuantization(const TfLiteTensor& tfLiteTensor)
diff --git a/delegate/src/Split.hpp b/delegate/src/Split.hpp
new file mode 100644
index 0000000000..74da979666
--- /dev/null
+++ b/delegate/src/Split.hpp
@@ -0,0 +1,331 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateUtils.hpp"
+
+#include <algorithm>
+#include <iterator>
+#include <string>
+#include <vector>
+
+namespace armnnDelegate
+{
+
+constexpr unsigned int MaxNumOfTensorDimensions = 5U;
+
+TfLiteStatus VisitSplitOperator(DelegateData& delegateData,
+                                TfLiteContext* tfLiteContext,
+                                TfLiteNode* tfLiteNode,
+                                int nodeIndex,
+                                int32_t tfLiteSplitOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+
+    auto* splitParameters = reinterpret_cast<TfLiteSplitParams*>(tfLiteNode->builtin_data);
+    const unsigned int numSplits =  NonNegative(splitParameters->num_splits, nodeIndex);
+
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, numSplits, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteAxisTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteAxisTensor, tfLiteSplitOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, tfLiteSplitOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& axisTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteAxisTensor);
+    const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+
+    ARMNN_ASSERT(axisTensorInfo.GetNumElements() == 1);
+    auto* axisTensorDataPtr = tflite::GetTensorData<int32_t>(&tfLiteAxisTensor);
+    std::vector<int32_t> axisTensorData(axisTensorDataPtr, axisTensorDataPtr + 1);
+    const unsigned int splitDim = axisTensorData[0];
+
+    std::vector<armnn::TensorInfo> outputs;
+    for (unsigned int i = 0; i < numSplits; ++i)
+    {
+        const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[i]];
+        if (!IsValid(tfLiteContext, tfLiteOutputTensor, tfLiteSplitOperatorCode, nodeIndex))
+        {
+            return kTfLiteError;
+        }
+        outputs.push_back(GetTensorInfoForTfLiteTensor(tfLiteOutputTensor));
+    }
+    const std::vector<std::reference_wrapper<armnn::TensorInfo>> outputTensorInfos(outputs.begin(), outputs.end());
+
+    auto inputDimSize = inputTensorInfo.GetNumDimensions();
+    if (inputDimSize > MaxNumOfTensorDimensions)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: The number of dimensions: #%d for input tensors of the split op cannot be greater "
+            "than #%d in node #%d: ", inputDimSize, MaxNumOfTensorDimensions, nodeIndex);
+        return kTfLiteError;
+    }
+
+    std::vector<unsigned int> splitterDimSizes(inputDimSize);
+
+    // Add current input shape to splitterDimSizes
+    for (unsigned int i = 0; i < inputDimSize; ++i)
+    {
+        splitterDimSizes[i] = inputTensorInfo.GetShape()[i];
+    }
+
+    if (splitterDimSizes[splitDim] % numSplits != 0)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Number of splits #%d must evenly divide the dimension #%d in node #%d: ",
+            numSplits, splitterDimSizes[splitDim], nodeIndex);
+        return kTfLiteError;
+    }
+    splitterDimSizes[splitDim] /= numSplits;
+
+    armnn::SplitterDescriptor splitDescriptor(numSplits, inputDimSize);
+    for (unsigned int j = 0; j < numSplits; ++j)
+    {
+        // Set the size of the views.
+        for (unsigned int dimIdx = 0; dimIdx < splitterDimSizes.size(); ++dimIdx)
+        {
+            splitDescriptor.SetViewSize(j, dimIdx, splitterDimSizes[dimIdx]);
+        }
+        splitDescriptor.SetViewOriginCoord(j, splitDim, splitterDimSizes[splitDim] * j);
+    }
+
+    if (!delegateData.m_Network)
+    {
+        // Check if supported
+        bool isSupported = false;
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsSplitterSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfos,
+                                   splitDescriptor);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddSplitterLayer(splitDescriptor);
+    ARMNN_ASSERT(layer != nullptr);
+
+    for (unsigned int k = 0; k < layer->GetNumOutputSlots(); ++k)
+    {
+        layer->GetOutputSlot(k).SetTensorInfo(outputs[k]);
+    }
+
+    // Connect the input slots
+    delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[1]]->Connect(layer->GetInputSlot(0));
+
+    // Prepare output slots
+    for (unsigned int outputIndex = 0; outputIndex < layer->GetNumOutputSlots(); ++outputIndex)
+    {
+        armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(outputIndex);
+        delegateData.m_OutputSlotForNode[
+            static_cast<unsigned long>(tfLiteNode->outputs->data[outputIndex])] = &outputSlot;
+    }
+
+    return kTfLiteOk;
+}
+
+TfLiteStatus VisitSplitVOperator(DelegateData& delegateData,
+                                 TfLiteContext* tfLiteContext,
+                                 TfLiteNode* tfLiteNode,
+                                 int nodeIndex,
+                                 int32_t tfLiteSplitVOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 3, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, tfLiteSplitVOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteSplitsTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (!IsValid(tfLiteContext, tfLiteSplitsTensor, tfLiteSplitVOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteAxisTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+    if (!IsValid(tfLiteContext, tfLiteAxisTensor, tfLiteSplitVOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& splitsTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteSplitsTensor);
+    ARMNN_ASSERT(splitsTensorInfo.GetNumDimensions() == 1);
+
+    const armnn::TensorInfo& axisTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteAxisTensor);
+    ARMNN_ASSERT(axisTensorInfo.GetNumElements() == 1);
+    auto* axisTensorDataPtr = tflite::GetTensorData<int32_t>(&tfLiteAxisTensor);
+    std::vector<int32_t> axisTensorData(axisTensorDataPtr, axisTensorDataPtr + 1);
+
+    auto ComputeWrappedIndex = [](int index, unsigned int numDimensions)
+    {
+        int numDims = armnn::numeric_cast<int>(numDimensions);
+        int wrappedIndex = index < 0 ? numDims + index : index;
+        ARMNN_ASSERT(wrappedIndex >= 0);
+        ARMNN_ASSERT(wrappedIndex < numDims);
+
+        return static_cast<unsigned int>(wrappedIndex);
+    };
+
+    const unsigned int splitDim = ComputeWrappedIndex(axisTensorData[0],
+                                                      inputTensorInfo.GetNumDimensions());
+
+    auto* splitVParameters = reinterpret_cast<TfLiteSplitVParams*>(tfLiteNode->builtin_data);
+    unsigned int numSplits = 0;
+    if (splitVParameters)
+    {
+        numSplits = NonNegative(splitVParameters->num_splits, nodeIndex);
+    }
+    else
+    {
+        numSplits = splitsTensorInfo.GetNumElements();
+    }
+
+    if (numSplits <= 0)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext, "TfLiteArmnnDelegate: Invalid number of splits %d  in node #%d",
+            numSplits, nodeIndex);
+        return kTfLiteError;
+    }
+
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, numSplits, nodeIndex));
+    std::vector<armnn::TensorInfo> outputs;
+    for (unsigned int i = 0; i < numSplits; ++i)
+    {
+        const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[i]];
+        if (!IsValid(tfLiteContext, tfLiteOutputTensor, tfLiteSplitVOperatorCode, nodeIndex))
+        {
+            return kTfLiteError;
+        }
+        outputs.push_back(GetTensorInfoForTfLiteTensor(tfLiteOutputTensor));
+    }
+    const std::vector<std::reference_wrapper<armnn::TensorInfo>> outputTensorInfos(outputs.begin(), outputs.end());
+
+    auto inputDimSize = inputTensorInfo.GetNumDimensions();
+    if (inputDimSize > MaxNumOfTensorDimensions)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: The number of dimensions: #%d for input tensors of the split op cannot be greater "
+            "than #%d in node #%d: ", inputDimSize, MaxNumOfTensorDimensions, nodeIndex);
+        return kTfLiteError;
+    }
+
+    std::vector<int32_t> splitsTensorData(numSplits);
+#ifdef __STDC_LIB_EXT1__
+    ::memcpy_s(splitsTensorData.data(), sizeof(splitsTensorData),
+                  tfLiteSplitsTensor.data.data, splitsTensorInfo.GetNumBytes());
+#else
+    ::memcpy(splitsTensorData.data(), tfLiteSplitsTensor.data.data, splitsTensorInfo.GetNumBytes());
+#endif
+
+    unsigned int index         = 0;
+    unsigned int inferredIndex = 0;
+    int numberOfInferred       = 0;
+    int splitSum = 0;
+
+    for (auto splitData : splitsTensorData)
+    {
+        if (splitData < 0)
+        {
+            ++numberOfInferred;
+            inferredIndex = index;
+        }
+        else
+        {
+            splitSum += splitData;
+        }
+        ++index;
+    }
+
+    // Check for inferred axis
+    if (numberOfInferred == 0)
+    {
+        if (splitSum != armnn::numeric_cast<int>(inputTensorInfo.GetShape()[splitDim]))
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext, "TfLiteArmnnDelegate: SplitV split_sizes does not sum to the dimension of value along"
+                               " split_dim in node #%d", nodeIndex);
+            return kTfLiteError;
+        }
+    }
+    else if (numberOfInferred == 1)
+    {
+        splitsTensorData[inferredIndex] = armnn::numeric_cast<int>(inputTensorInfo.GetShape()[splitDim]) - splitSum;
+    }
+    else
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext, "TfLiteArmnnDelegate: SplitV cannot infer split size for more than one split in node #%d",
+            nodeIndex);
+        return kTfLiteError;
+    }
+
+    armnn::SplitterDescriptor splitDescriptor(numSplits, inputDimSize);
+    unsigned int accumSplit = 0;
+    for (unsigned int j = 0; j < numSplits; ++j)
+    {
+        unsigned int splitSize = armnn::numeric_cast<unsigned int>(splitsTensorData[j]);
+
+        // Set the size of the views.
+        for (unsigned int dimIdx = 0; dimIdx < inputTensorInfo.GetNumDimensions(); ++dimIdx)
+        {
+            unsigned int dimSize = inputTensorInfo.GetShape()[dimIdx];
+            if (dimIdx == splitDim)
+            {
+                dimSize = splitSize;
+            }
+            splitDescriptor.SetViewSize(j, dimIdx, dimSize);
+        }
+
+        splitDescriptor.SetViewOriginCoord(j, splitDim, accumSplit);
+        accumSplit += splitSize;
+    }
+
+    if (!delegateData.m_Network)
+    {
+        // Check if supported
+        bool isSupported = false;
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsSplitterSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfos,
+                                   splitDescriptor);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddSplitterLayer(splitDescriptor);
+    ARMNN_ASSERT(layer != nullptr);
+
+    for (unsigned int k = 0; k < layer->GetNumOutputSlots(); ++k)
+    {
+        layer->GetOutputSlot(k).SetTensorInfo(outputs[k]);
+    }
+
+    // Connect
+    return Connect(layer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/delegate/src/armnn_delegate.cpp b/delegate/src/armnn_delegate.cpp
index d2b1796708..9097211241 100644
--- a/delegate/src/armnn_delegate.cpp
+++ b/delegate/src/armnn_delegate.cpp
@@ -27,6 +27,7 @@
 #include "Slice.hpp"
 #include "Softmax.hpp"
 #include "SpaceDepth.hpp"
+#include "Split.hpp"
 #include "Transpose.hpp"
 
 #include <flatbuffers/flatbuffers.h>
@@ -714,6 +715,18 @@ TfLiteStatus ArmnnSubgraph::VisitNode(DelegateData& delegateData,
                                                  tfLiteNode,
                                                  nodeIndex,
                                                  armnn::UnaryOperation::Rsqrt);
+        case kTfLiteBuiltinSplit:
+            return VisitSplitOperator(delegateData,
+                                      tfLiteContext,
+                                      tfLiteNode,
+                                      nodeIndex,
+                                      kTfLiteBuiltinSplit);
+        case kTfLiteBuiltinSplitV:
+            return VisitSplitVOperator(delegateData,
+                                       tfLiteContext,
+                                       tfLiteNode,
+                                       nodeIndex,
+                                       kTfLiteBuiltinSplitV);
         case kTfLiteBuiltinSqrt:
             return VisitElementwiseUnaryOperator(delegateData,
                                                  tfLiteContext,
diff --git a/delegate/src/test/SplitTest.cpp b/delegate/src/test/SplitTest.cpp
new file mode 100644
index 0000000000..5940516583
--- /dev/null
+++ b/delegate/src/test/SplitTest.cpp
@@ -0,0 +1,262 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "SplitTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+// SPLIT Operator
+void SplitUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> axisShape { 1 };
+    std::vector<int32_t> inputShape { 2, 2, 2, 2} ;
+    std::vector<int32_t> outputShape0 { 2, 2, 2, 1 };
+    std::vector<int32_t> outputShape1 { 2, 2, 2, 1 };
+    std::vector<std::vector<int32_t>> outputShapes{ outputShape0, outputShape1 };
+
+    std::vector<int32_t> axisData { 3 };  // Axis
+    std::vector<uint8_t> inputValues { 1, 2, 3, 4, 5, 6, 7, 8,
+                                       9, 10, 11, 12, 13, 14, 15, 16 }; // Input
+
+
+    std::vector<uint8_t> expectedOutputValues0 { 1, 3, 5, 7, 9, 11, 13, 15 };
+    std::vector<uint8_t> expectedOutputValues1 { 2, 4, 6, 8, 10, 12, 14, 16 };
+    std::vector<std::vector<uint8_t>> expectedOutputValues{ expectedOutputValues0, expectedOutputValues1 };
+
+    int32_t numSplits = 2;
+
+    SplitTest<uint8_t>(::tflite::TensorType_UINT8,
+                       backends,
+                       axisShape,
+                       inputShape,
+                       outputShapes,
+                       axisData,
+                       inputValues,
+                       expectedOutputValues,
+                       numSplits);
+}
+
+void SplitFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> axisShape { 1 };
+    std::vector<int32_t> inputShape { 2, 2, 2, 2 };
+    std::vector<int32_t> outputShape0 { 2, 1, 2, 2 };
+    std::vector<int32_t> outputShape1 { 2, 1, 2, 2 };
+    std::vector<std::vector<int32_t>> outputShapes{ outputShape0, outputShape1 };
+
+    std::vector<int32_t> axisData { 1 };  // Axis
+    std::vector<float> inputValues { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
+                                     9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f  }; // Input
+
+
+    std::vector<float> expectedOutputValues0 { 1.0f, 2.0f, 3.0f, 4.0f, 9.0f, 10.0f, 11.0f, 12.0f };
+    std::vector<float> expectedOutputValues1 { 5.0f, 6.0f, 7.0f, 8.0f, 13.0f, 14.0f, 15.0f, 16.0f };
+    std::vector<std::vector<float>> expectedOutputValues{ expectedOutputValues0, expectedOutputValues1 };
+
+    int32_t numSplits = 2;
+
+    SplitTest<float>(::tflite::TensorType_FLOAT32,
+                     backends,
+                     axisShape,
+                     inputShape,
+                     outputShapes,
+                     axisData,
+                     inputValues,
+                     expectedOutputValues,
+                     numSplits);
+}
+
+// SPLIT Test Suite
+TEST_SUITE("SPLIT_CpuRefTests")
+{
+
+TEST_CASE ("SPLIT_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    SplitUint8Test(backends);
+}
+
+TEST_CASE ("SPLIT_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    SplitFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("SPLIT_CpuAccTests")
+{
+
+TEST_CASE ("SPLIT_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    SplitUint8Test(backends);
+}
+
+TEST_CASE ("SPLIT_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    SplitFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("SPLIT_GpuAccTests")
+{
+
+TEST_CASE ("SPLIT_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    SplitUint8Test(backends);
+}
+
+TEST_CASE ("SPLIT_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    SplitFp32Test(backends);
+}
+
+}
+// End of SPLIT Test Suite
+
+// SPLIT_V Operator
+void SplitVUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> axisShape { 1 };
+    std::vector<int32_t> inputShape { 2, 4, 2, 2 };
+    std::vector<int32_t> splitsShape { 2 };
+    std::vector<int32_t> outputShape0 { 2, 3, 2, 2 };
+    std::vector<int32_t> outputShape1 { 2, 1, 2, 2 };
+    std::vector<std::vector<int32_t>> outputShapes{ outputShape0, outputShape1 };
+
+    std::vector<int32_t> axisData { 1 };    // Axis
+    std::vector<int32_t> splitsData { 3, 1 };  // Splits
+    std::vector<uint8_t> inputValues { 1, 2, 3, 4, 5, 6, 7, 8,
+                                     9, 10, 11, 12, 13, 14, 15, 16,
+                                     17, 18, 19, 20, 21, 22, 23, 24,
+                                     25, 26, 27, 28, 29, 30, 31, 32   }; // Input
+
+
+    std::vector<uint8_t> expectedOutputValues0 { 1, 2, 3, 4, 5, 6, 7, 8,
+                                               9, 10, 11, 12, 17, 18, 19, 20,
+                                               21, 22, 23, 24, 25, 26, 27, 28 };
+    std::vector<uint8_t> expectedOutputValues1 { 13, 14, 15, 16, 29, 30, 31, 32 };
+    std::vector<std::vector<uint8_t>> expectedOutputValues{ expectedOutputValues0, expectedOutputValues1 };
+
+    int32_t numSplits = 2;
+
+    SplitVTest<uint8_t>(::tflite::TensorType_UINT8,
+                        backends,
+                        inputShape,
+                        splitsShape,
+                        axisShape,
+                        outputShapes,
+                        inputValues,
+                        splitsData,
+                        axisData,
+                        expectedOutputValues,
+                        numSplits);
+}
+
+void SplitVFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> axisShape { 1 };
+    std::vector<int32_t> inputShape { 2, 4, 2, 2 };
+    std::vector<int32_t> splitsShape { 2 };
+    std::vector<int32_t> outputShape0 { 2, 3, 2, 2 };
+    std::vector<int32_t> outputShape1 { 2, 1, 2, 2 };
+    std::vector<std::vector<int32_t>> outputShapes{ outputShape0, outputShape1 };
+
+    std::vector<int32_t> axisData { 1 };    // Axis
+    std::vector<int32_t> splitsData { 3, 1 };  // Splits
+    std::vector<float> inputValues { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
+                                     9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f,
+                                     17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f,
+                                     25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f   }; // Input
+
+
+    std::vector<float> expectedOutputValues0 { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
+                                               9.0f, 10.0f, 11.0f, 12.0f, 17.0f, 18.0f, 19.0f, 20.0f,
+                                               21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f };
+    std::vector<float> expectedOutputValues1 { 13.0f, 14.0f, 15.0f, 16.0f, 29.0f, 30.0f, 31.0f, 32.0f };
+    std::vector<std::vector<float>> expectedOutputValues{ expectedOutputValues0, expectedOutputValues1 };
+
+    int32_t numSplits = 2;
+
+    SplitVTest<float>(::tflite::TensorType_FLOAT32,
+                      backends,
+                      inputShape,
+                      splitsShape,
+                      axisShape,
+                      outputShapes,
+                      inputValues,
+                      splitsData,
+                      axisData,
+                      expectedOutputValues,
+                      numSplits);
+}
+
+// SPLIT_V Test Suite
+TEST_SUITE("SPLIT_V_CpuRefTests")
+{
+
+TEST_CASE ("SPLIT_V_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    SplitVUint8Test(backends);
+}
+
+TEST_CASE ("SPLIT_V_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    SplitVFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("SPLIT_V_CpuAccTests")
+{
+
+TEST_CASE ("SPLIT_V_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    SplitVUint8Test(backends);
+}
+
+TEST_CASE ("SPLIT_V_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    SplitVFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("SPLIT_V_GpuAccTests")
+{
+
+TEST_CASE ("SPLIT_V_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    SplitVUint8Test(backends);
+}
+
+TEST_CASE ("SPLIT_V_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    SplitVFp32Test(backends);
+}
+
+}
+// End of SPLIT_V Test Suite
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/delegate/src/test/SplitTestHelper.hpp b/delegate/src/test/SplitTestHelper.hpp
new file mode 100644
index 0000000000..31fc7d5e46
--- /dev/null
+++ b/delegate/src/test/SplitTestHelper.hpp
@@ -0,0 +1,368 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+#include <string>
+
+namespace
+{
+
+std::vector<char> CreateSplitTfLiteModel(tflite::TensorType tensorType,
+                                         std::vector<int32_t>& axisTensorShape,
+                                         std::vector<int32_t>& inputTensorShape,
+                                         const std::vector<std::vector<int32_t>>& outputTensorShapes,
+                                         std::vector<int32_t>& axisData,
+                                         const int32_t numSplits,
+                                         float quantScale = 1.0f,
+                                         int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::array<flatbuffers::Offset<tflite::Buffer>, 2> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+    buffers[1] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(axisData.data()),
+                                                             sizeof(int32_t) * axisData.size()));
+
+    auto quantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                         flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    std::array<flatbuffers::Offset<Tensor>, 4> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(axisTensorShape.data(),
+                                                                      axisTensorShape.size()),
+                              ::tflite::TensorType_INT32,
+                              1,
+                              flatBufferBuilder.CreateString("axis"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+
+    // Create output tensor
+    for (unsigned int i = 0; i < outputTensorShapes.size(); ++i)
+    {
+        tensors[i + 2] = CreateTensor(flatBufferBuilder,
+                                  flatBufferBuilder.CreateVector<int32_t>(outputTensorShapes[i].data(),
+                                                                          outputTensorShapes[i].size()),
+                                  tensorType,
+                                  0,
+                                  flatBufferBuilder.CreateString("output"),
+                                  quantizationParameters);
+    }
+
+    // create operator. Mean uses ReducerOptions.
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_SplitOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions = CreateSplitOptions(flatBufferBuilder, numSplits).Union();
+
+    const std::vector<int> operatorInputs{ {0, 1} };
+    const std::vector<int> operatorOutputs{ {2, 3} };
+    flatbuffers::Offset <Operator> controlOperator =
+            CreateOperator(flatBufferBuilder,
+                           0,
+                           flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                           operatorBuiltinOptionsType,
+                           operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{ {0, 1} };
+    const std::vector<int> subgraphOutputs{ {2, 3} };
+    flatbuffers::Offset <SubGraph> subgraph =
+            CreateSubGraph(flatBufferBuilder,
+                           flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                           flatBufferBuilder.CreateVector(&controlOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+            flatBufferBuilder.CreateString("ArmnnDelegate: SPLIT Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, BuiltinOperator_SPLIT);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+            CreateModel(flatBufferBuilder,
+                        TFLITE_SCHEMA_VERSION,
+                        flatBufferBuilder.CreateVector(&operatorCode, 1),
+                        flatBufferBuilder.CreateVector(&subgraph, 1),
+                        modelDescription,
+                        flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void SplitTest(tflite::TensorType tensorType,
+               std::vector<armnn::BackendId>& backends,
+               std::vector<int32_t>& axisTensorShape,
+               std::vector<int32_t>& inputTensorShape,
+               std::vector<std::vector<int32_t>>& outputTensorShapes,
+               std::vector<int32_t>& axisData,
+               std::vector<T>& inputValues,
+               std::vector<std::vector<T>>& expectedOutputValues,
+               const int32_t numSplits,
+               float quantScale = 1.0f,
+               int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateSplitTfLiteModel(tensorType,
+                                                           axisTensorShape,
+                                                           inputTensorShape,
+                                                           outputTensorShapes,
+                                                           axisData,
+                                                           numSplits,
+                                                           quantScale,
+                                                           quantOffset);
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegate) == kTfLiteOk);
+    CHECK(armnnDelegate != nullptr);
+    CHECK(armnnDelegate->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteDelegate) == kTfLiteOk);
+    CHECK(tfLiteDelegate != nullptr);
+    CHECK(tfLiteDelegate->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+    theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                     armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegate->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteDelegate, 1, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegate, 1, inputValues);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteDelegate->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegate->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    for (unsigned int i = 0; i < expectedOutputValues.size(); ++i)
+    {
+        armnnDelegate::CompareOutputData<T>(tfLiteDelegate,
+                                            armnnDelegate,
+                                            outputTensorShapes[i],
+                                            expectedOutputValues[i],
+                                            i);
+    }
+
+    tfLiteDelegate.reset(nullptr);
+    armnnDelegate.reset(nullptr);
+} // End of SPLIT Test
+
+std::vector<char> CreateSplitVTfLiteModel(tflite::TensorType tensorType,
+                                          std::vector<int32_t>& inputTensorShape,
+                                          std::vector<int32_t>& splitsTensorShape,
+                                          std::vector<int32_t>& axisTensorShape,
+                                          const std::vector<std::vector<int32_t>>& outputTensorShapes,
+                                          std::vector<int32_t>& splitsData,
+                                          std::vector<int32_t>& axisData,
+                                          const int32_t numSplits,
+                                          float quantScale = 1.0f,
+                                          int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::array<flatbuffers::Offset<tflite::Buffer>, 3> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+    buffers[1] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(splitsData.data()),
+                                                             sizeof(int32_t) * splitsData.size()));
+    buffers[2] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(axisData.data()),
+                                                             sizeof(int32_t) * axisData.size()));
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    std::array<flatbuffers::Offset<Tensor>, 5> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(splitsTensorShape.data(),
+                                                                      splitsTensorShape.size()),
+                              ::tflite::TensorType_INT32,
+                              1,
+                              flatBufferBuilder.CreateString("splits"),
+                              quantizationParameters);
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(axisTensorShape.data(),
+                                                                      axisTensorShape.size()),
+                              ::tflite::TensorType_INT32,
+                              2,
+                              flatBufferBuilder.CreateString("axis"),
+                              quantizationParameters);
+
+    // Create output tensor
+    for (unsigned int i = 0; i < outputTensorShapes.size(); ++i)
+    {
+        tensors[i + 3] = CreateTensor(flatBufferBuilder,
+                                      flatBufferBuilder.CreateVector<int32_t>(outputTensorShapes[i].data(),
+                                                                              outputTensorShapes[i].size()),
+                                      tensorType,
+                                      0,
+                                      flatBufferBuilder.CreateString("output"),
+                                      quantizationParameters);
+    }
+
+    // create operator. Mean uses ReducerOptions.
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_SplitVOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions = CreateSplitVOptions(flatBufferBuilder, numSplits).Union();
+
+    const std::vector<int> operatorInputs{ {0, 1, 2} };
+    const std::vector<int> operatorOutputs{ {3, 4} };
+    flatbuffers::Offset <Operator> controlOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType,
+                       operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{ {0, 1, 2} };
+    const std::vector<int> subgraphOutputs{ {3, 4} };
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&controlOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: SPLIT_V Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, BuiltinOperator_SPLIT_V);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void SplitVTest(tflite::TensorType tensorType,
+                std::vector<armnn::BackendId>& backends,
+                std::vector<int32_t>& inputTensorShape,
+                std::vector<int32_t>& splitsTensorShape,
+                std::vector<int32_t>& axisTensorShape,
+                std::vector<std::vector<int32_t>>& outputTensorShapes,
+                std::vector<T>& inputValues,
+                std::vector<int32_t>& splitsData,
+                std::vector<int32_t>& axisData,
+                std::vector<std::vector<T>>& expectedOutputValues,
+                const int32_t numSplits,
+                float quantScale = 1.0f,
+                int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateSplitVTfLiteModel(tensorType,
+                                                            inputTensorShape,
+                                                            splitsTensorShape,
+                                                            axisTensorShape,
+                                                            outputTensorShapes,
+                                                            splitsData,
+                                                            axisData,
+                                                            numSplits,
+                                                            quantScale,
+                                                            quantOffset);
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegate) == kTfLiteOk);
+    CHECK(armnnDelegate != nullptr);
+    CHECK(armnnDelegate->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteDelegate) == kTfLiteOk);
+    CHECK(tfLiteDelegate != nullptr);
+    CHECK(tfLiteDelegate->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+            theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                             armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegate->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteDelegate, 0, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegate, 0, inputValues);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteDelegate->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegate->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    for (unsigned int i = 0; i < expectedOutputValues.size(); ++i)
+    {
+        armnnDelegate::CompareOutputData<T>(tfLiteDelegate,
+                                            armnnDelegate,
+                                            outputTensorShapes[i],
+                                            expectedOutputValues[i],
+                                            i);
+    }
+
+    tfLiteDelegate.reset(nullptr);
+    armnnDelegate.reset(nullptr);
+} // End of SPLIT_V Test
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/delegate/src/test/TestUtils.hpp b/delegate/src/test/TestUtils.hpp
index 57ae3ce6fe..284eaa74f5 100644
--- a/delegate/src/test/TestUtils.hpp
+++ b/delegate/src/test/TestUtils.hpp
@@ -51,12 +51,13 @@ template <typename T>
 void CompareOutputData(std::unique_ptr<tflite::Interpreter>& tfLiteInterpreter,
                        std::unique_ptr<tflite::Interpreter>& armnnDelegateInterpreter,
                        std::vector<int32_t>& expectedOutputShape,
-                       std::vector<T>& expectedOutputValues)
+                       std::vector<T>& expectedOutputValues,
+                       unsigned int outputIndex = 0)
 {
-    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[outputIndex];
     auto tfLiteDelegateOutputTensor = tfLiteInterpreter->tensor(tfLiteDelegateOutputId);
     auto tfLiteDelegateOutputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateOutputId);
-    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[outputIndex];
     auto armnnDelegateOutputTensor = armnnDelegateInterpreter->tensor(armnnDelegateOutputId);
     auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateOutputId);
 
-- 
cgit v1.2.1