From 32ca144fc8b4f0a1e2eda274da55ffd0a6016c02 Mon Sep 17 00:00:00 2001
From: Sadik Armagan <sadik.armagan@arm.com>
Date: Fri, 13 Nov 2020 17:51:56 +0000
Subject: IVGCVSW-5338 TfLiteDelegate: Implement the Convolution operators

* Add Convolution, DepthwiseConvolution and TransposeConvolution

Signed-off-by: Kevin May <kevin.may@arm.com>
Signed-off-by: Sadik Armagan <sadik.armagan@arm.com>
Change-Id: I797e42844dfee0cc80beb64eabc3111b96320daf
---
 delegate/CMakeLists.txt                          |   3 +
 delegate/src/Convolution.hpp                     | 579 ++++++++++++++++++++++-
 delegate/src/DelegateUtils.hpp                   |  55 ++-
 delegate/src/Pooling.hpp                         |  25 +-
 delegate/src/test/Convolution2dTest.cpp          | 428 +++++++++++++++++
 delegate/src/test/ConvolutionTestHelper.hpp      | 504 ++++++++++++++++++++
 delegate/src/test/DepthwiseConvolution2dTest.cpp | 180 +++++++
 7 files changed, 1742 insertions(+), 32 deletions(-)
 create mode 100644 delegate/src/test/Convolution2dTest.cpp
 create mode 100644 delegate/src/test/ConvolutionTestHelper.hpp
 create mode 100644 delegate/src/test/DepthwiseConvolution2dTest.cpp

diff --git a/delegate/CMakeLists.txt b/delegate/CMakeLists.txt
index 0dc72c2af6..0a3015aff1 100644
--- a/delegate/CMakeLists.txt
+++ b/delegate/CMakeLists.txt
@@ -93,6 +93,9 @@ if(BUILD_UNIT_TESTS)
         src/test/ArmnnDelegateTest.cpp
         src/test/ComparisonTest.cpp
         src/test/ComparisonTestHelper.hpp
+        src/test/Convolution2dTest.cpp
+        src/test/ConvolutionTestHelper.hpp
+        src/test/DepthwiseConvolution2dTest.cpp
         src/test/ElementwiseBinaryTest.cpp
         src/test/ElementwiseBinaryTestHelper.hpp
         src/test/ElementwiseUnaryTest.cpp
diff --git a/delegate/src/Convolution.hpp b/delegate/src/Convolution.hpp
index f16c9638c3..fed084e3a9 100644
--- a/delegate/src/Convolution.hpp
+++ b/delegate/src/Convolution.hpp
@@ -5,21 +5,598 @@
 
 #pragma once
 
+#include "DelegateUtils.hpp"
+
 #include <tensorflow/lite/builtin_ops.h>
 #include <tensorflow/lite/c/builtin_op_data.h>
 #include <tensorflow/lite/c/common.h>
 #include <tensorflow/lite/minimal_logging.h>
+#include "tensorflow/lite/kernels/internal/tensor.h"
 
 namespace armnnDelegate
 {
 
+TfLiteStatus VisitConv2dOperator(DelegateData& delegateData,
+                                 TfLiteContext* tfLiteContext,
+                                 TfLiteNode* tfLiteNode,
+                                 int nodeIndex,
+                                 int32_t operatorCode)
+{
+    auto numInputs = tfLiteNode->inputs->size;
+    if (numInputs < 2)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext, "TfLiteArmnnDelegate: Minimum number of inputs (%d != %d) in node #%d",
+            2, numInputs, nodeIndex);
+        return kTfLiteError;
+    }
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    armnn::Convolution2dDescriptor descriptor;
+    const auto params = reinterpret_cast<TfLiteConvParams*>(tfLiteNode->builtin_data);
+
+    bool biasEnabled = tfLiteNode->inputs->size > 2;
+    descriptor.m_BiasEnabled = biasEnabled;
+    descriptor.m_StrideX = NonNegative(params->stride_width, nodeIndex);
+    descriptor.m_StrideY = NonNegative(params->stride_height, nodeIndex);
+    descriptor.m_DataLayout = armnn::DataLayout::NHWC;
+    descriptor.m_DilationX = NonNegative(params->dilation_width_factor, nodeIndex);
+    descriptor.m_DilationY = NonNegative(params->dilation_height_factor, nodeIndex);
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if(!IsValid(&tfLiteTensors[tfLiteNode->inputs->data[0]]))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid input tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if(!IsValid(&tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid output tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteFilterTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if(!IsValid(&tfLiteFilterTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid filter tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteFilterTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic filter tensors are not supported in node #%d: ",
+            nodeIndex);
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    armnn::TensorInfo filterTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteFilterTensor);
+
+    armnn::TensorInfo biasTensorInfo;
+    if(biasEnabled)
+    {
+        const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+        if(!IsValid(&tfLiteBiasTensor))
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: Invalid bias tensor in operator #%d node #%d: ",
+                operatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+        if (IsDynamicTensor(tfLiteBiasTensor))
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: Dynamic bias tensors are not supported in node #%d: ",
+                nodeIndex);
+            return kTfLiteError;
+        }
+        biasTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteBiasTensor);
+    }
+    else
+    {
+        biasTensorInfo = armnn::TensorInfo(armnn::TensorShape({1}), GetDataType(tfLiteInputTensor));
+    }
+
+    armnn::Optional<armnn::TensorInfo> optionalBiasInfo(biasTensorInfo);
+
+    // TfLite uses NHWC tensors
+    const unsigned int inputHeight = inputTensorInfo.GetShape()[1];
+    const unsigned int inputWidth  = inputTensorInfo.GetShape()[2];
+
+    const unsigned int filterHeight = filterTensorInfo.GetShape()[1];
+    const unsigned int filterWidth  = filterTensorInfo.GetShape()[2];
+
+    // Calculate padding
+    CalcPadding(inputHeight, filterHeight, descriptor.m_StrideY, descriptor.m_DilationY,
+                descriptor.m_PadTop, descriptor.m_PadBottom, params->padding);
+    CalcPadding(inputWidth, filterWidth, descriptor.m_StrideX, descriptor.m_DilationX,
+                descriptor.m_PadLeft, descriptor.m_PadRight, params->padding);
+
+    if (!delegateData.m_Network)
+    {
+        bool isSupported = false;
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsConvolution2dSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo,
+                                   descriptor,
+                                   filterTensorInfo,
+                                   optionalBiasInfo);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* layer = nullptr;
+
+    // Set up filter and biases
+    auto filter =
+        CreateConstTensor(&tfLiteContext->tensors[tfLiteNode->inputs->data[1]],
+                          filterTensorInfo,
+                          armnn::Optional<armnn::PermutationVector&>());
+
+    if(biasEnabled)
+    {
+        auto biases =
+            CreateConstTensor(&tfLiteContext->tensors[tfLiteNode->inputs->data[2]],
+                              biasTensorInfo,
+                              armnn::Optional<armnn::PermutationVector&>());
+        layer = delegateData.m_Network->AddConvolution2dLayer(descriptor,
+                                                              filter,
+                                                              armnn::Optional<armnn::ConstTensor>(biases));
+    }
+    else
+    {
+        layer = delegateData.m_Network->AddConvolution2dLayer(descriptor,
+                                                              filter,
+                                                              armnn::EmptyOptional());
+    }
+
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    Connect(layer, tfLiteNode, delegateData);
+
+    auto* tfLiteNodeParameters = reinterpret_cast<TfLiteConvParams*>(tfLiteNode->builtin_data);
+    if (!tfLiteNodeParameters)
+    {
+        // No Activation
+        return kTfLiteOk;
+    }
+    // Check activation
+    TfLiteFusedActivation activationType = tfLiteNodeParameters->activation;
+    return FusedActivation(tfLiteContext, tfLiteNode, activationType, layer, 0, delegateData);
+
+}
+
+TfLiteStatus VisitDepthwiseConv2dOperator(DelegateData& delegateData,
+                                          TfLiteContext* tfLiteContext,
+                                          TfLiteNode* tfLiteNode,
+                                          int nodeIndex,
+                                          int32_t operatorCode)
+{
+    auto numInputs = tfLiteNode->inputs->size;
+    if (numInputs < 2)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext, "TfLiteArmnnDelegate: Minimum number of inputs (%d != %d) in node #%d",
+            2, numInputs, nodeIndex);
+        return kTfLiteError;
+    }
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    bool biasEnabled = tfLiteNode->inputs->size > 2;
+
+    armnn::DepthwiseConvolution2dDescriptor descriptor;
+    const auto params = reinterpret_cast<TfLiteDepthwiseConvParams*>(tfLiteNode->builtin_data);
+
+    descriptor.m_BiasEnabled = biasEnabled;
+    descriptor.m_StrideX = NonNegative(params->stride_width, nodeIndex);
+    descriptor.m_StrideY = NonNegative(params->stride_height, nodeIndex);
+    descriptor.m_DataLayout = armnn::DataLayout::NHWC;
+    descriptor.m_DilationX = NonNegative(params->dilation_width_factor, nodeIndex);
+    descriptor.m_DilationY = NonNegative(params->dilation_height_factor, nodeIndex);
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if(!IsValid(&tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid input tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if(!IsValid(&tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid output tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteFilterTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if(!IsValid(&tfLiteFilterTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid filter tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteFilterTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic filter tensors are not supported in node #%d: ",
+            nodeIndex);
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    // Mappings from TensorflowLite filter tensors to the ArmNN filter tensors (ArmNN weights have to be [M, I, H, W])
+    armnn::PermutationVector permutationVector{ 2, 3, 1, 0 }; // [H, W, I, M] -> [M, I, H, W]
+    armnn::TensorInfo filterTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteFilterTensor, permutationVector);
+
+    // Assuming input is NHWC
+    unsigned int inputHeight = inputTensorInfo.GetShape()[1];
+    unsigned int inputWidth  = inputTensorInfo.GetShape()[2];
+
+    // TensorflowLite weights come in the format [1, H, W, I * M]
+    unsigned int filterHeight = filterTensorInfo.GetShape()[1];
+    unsigned int filterWidth  = filterTensorInfo.GetShape()[2];
+
+    // Reshape weights as [ H, W, I, M ]
+    filterTensorInfo.SetShape({ filterHeight,
+                                filterWidth,
+                                inputTensorInfo.GetShape()[3],
+                                filterTensorInfo.GetShape()[3] / inputTensorInfo.GetShape()[3] });
+
+    // Calculate padding
+    CalcPadding(inputHeight, filterHeight, descriptor.m_StrideY, descriptor.m_DilationY,
+                descriptor.m_PadTop, descriptor.m_PadBottom, params->padding);
+    CalcPadding(inputWidth, filterWidth, descriptor.m_StrideX, descriptor.m_DilationX,
+                descriptor.m_PadLeft, descriptor.m_PadRight, params->padding);
+
+    armnn::TensorInfo biasTensorInfo;
+    if(biasEnabled)
+    {
+        const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+        if(!IsValid(&tfLiteBiasTensor))
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: Invalid bias tensor in operator #%d node #%d: ",
+                operatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+        if (IsDynamicTensor(tfLiteBiasTensor))
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: Dynamic bias tensors are not supported in node #%d: ",
+                nodeIndex);
+            return kTfLiteError;
+        }
+        biasTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteBiasTensor);
+    }
+    else
+    {
+        biasTensorInfo = armnn::TensorInfo(armnn::TensorShape({1}), GetDataType(tfLiteInputTensor));
+    }
+
+    if (!delegateData.m_Network)
+    {
+        bool isSupported = false;
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsDepthwiseConvolutionSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo,
+                                   descriptor,
+                                   filterTensorInfo,
+                                   armnn::Optional<armnn::TensorInfo>(biasTensorInfo));
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* layer = nullptr;
+    std::vector<uint8_t> swizzledData(filterTensorInfo.GetNumBytes());
+    auto filter =
+        CreateConstTensor(&tfLiteFilterTensor,
+                          filterTensorInfo,
+                          armnn::Optional<armnn::PermutationVector&>(permutationVector),
+                          swizzledData.data());
+    if(biasEnabled)
+    {
+        auto biases =
+            CreateConstTensor(&tfLiteContext->tensors[tfLiteNode->inputs->data[2]],
+                              biasTensorInfo,
+                              armnn::Optional<armnn::PermutationVector&>());
+        layer = delegateData.m_Network->AddDepthwiseConvolution2dLayer(descriptor,
+                                                                       filter,
+                                                                       armnn::Optional<armnn::ConstTensor>(biases));
+    }
+    else
+    {
+        layer = delegateData.m_Network->AddDepthwiseConvolution2dLayer(descriptor,
+                                                                       filter,
+                                                                       armnn::EmptyOptional());
+    }
+
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    Connect(layer, tfLiteNode, delegateData);
+    auto* tfLiteNodeParameters = reinterpret_cast<TfLiteDepthwiseConvParams*>(tfLiteNode->builtin_data);
+    if (!tfLiteNodeParameters)
+    {
+        // No Activation
+        return kTfLiteOk;
+    }
+    // Check activation
+    TfLiteFusedActivation activationType = tfLiteNodeParameters->activation;
+    return FusedActivation(tfLiteContext, tfLiteNode, activationType, layer, 0, delegateData);
+}
+
+TfLiteStatus VisitTransposeConv2dOperator(DelegateData& delegateData,
+                                          TfLiteContext* tfLiteContext,
+                                          TfLiteNode* tfLiteNode,
+                                          int nodeIndex,
+                                          int32_t operatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 3, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    armnn::TransposeConvolution2dDescriptor descriptor;
+    auto* parameters = reinterpret_cast<TfLiteTransposeConvParams*>(tfLiteNode->builtin_data);
+    descriptor.m_BiasEnabled = false;
+    descriptor.m_StrideX = NonNegative(parameters->stride_width, nodeIndex);
+    descriptor.m_StrideY = NonNegative(parameters->stride_height, nodeIndex);
+    descriptor.m_DataLayout = armnn::DataLayout::NHWC;
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteOutputShapeTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if(!IsValid(&tfLiteOutputShapeTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid input tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteOutputShapeTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    armnn::TensorInfo tensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputShapeTensor);
+    std::vector<int32_t> outputShape(tensorInfo.GetNumElements());
+    if (tensorInfo.GetDataType() == armnn::DataType::Signed32)
+    {
+        for(unsigned int i=0; i < tensorInfo.GetNumElements(); i++)
+        {
+            outputShape[i] = ::tflite::GetTensorData<int32_t>(&tfLiteOutputShapeTensor)[i];
+        }
+    }
+
+    if (tensorInfo.GetDataType() == armnn::DataType::QAsymmU8)
+    {
+        for(unsigned int i=0; i < tensorInfo.GetNumElements(); i++)
+        {
+            outputShape[i] = ::tflite::GetTensorData<uint8_t>(&tfLiteOutputShapeTensor)[i];
+        }
+    }
+    // Change from signed to unsigned int to store in TransposeConvolution2dDescriptor.
+    for (int dimension : outputShape)
+    {
+        descriptor.m_OutputShape.push_back(static_cast<unsigned int>(dimension));
+    }
+    descriptor.m_OutputShapeEnabled = true;
+
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+    if(!IsValid(&tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid input tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if(!IsValid(&tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid output tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteFilterTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if(!IsValid(&tfLiteFilterTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid filter tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteFilterTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic filter tensors are not supported in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+    armnn::TensorInfo filterTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteFilterTensor);
+
+    // TfLite uses NHWC tensors
+    const unsigned int inputHeight = inputTensorInfo.GetShape()[1];
+    const unsigned int inputWidth  = inputTensorInfo.GetShape()[2];
+
+    const unsigned int filterHeight = filterTensorInfo.GetShape()[1];
+    const unsigned int filterWidth  = filterTensorInfo.GetShape()[2];
+
+    // Calculate padding
+    CalcPadding(inputHeight,
+                filterHeight,
+                descriptor.m_StrideY,
+                1, // dilation y
+                descriptor.m_PadTop,
+                descriptor.m_PadBottom,
+                parameters->padding);
+    CalcPadding(inputWidth,
+                filterWidth,
+                descriptor.m_StrideX,
+                1, // dilation x
+                descriptor.m_PadLeft,
+                descriptor.m_PadRight,
+                parameters->padding);
+
+    // Set up filter
+    auto filterTensor = CreateConstTensor(&tfLiteFilterTensor,
+                                          filterTensorInfo,
+                                          armnn::Optional<armnn::PermutationVector&>());
+    if (!delegateData.m_Network)
+    {
+        bool isSupported = false;
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsTransposeConvolution2dSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo,
+                                   descriptor,
+                                   filterTensorInfo,
+                                   armnn::EmptyOptional());
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddTransposeConvolution2dLayer(descriptor,
+                                                                                             filterTensor,
+                                                                                             armnn::EmptyOptional());
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    if (delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[2]] != nullptr)
+    {
+        delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[2]]->Connect(layer->GetInputSlot(0));
+    }
+
+    // Prepare output slots
+    for (unsigned int outputIndex = 0; outputIndex < layer->GetNumOutputSlots(); ++outputIndex)
+    {
+        armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(outputIndex);
+        delegateData.m_OutputSlotForNode[tfLiteNode->outputs->data[outputIndex]] = &outputSlot;
+    }
+    return kTfLiteOk;
+}
+
 TfLiteStatus VisitConvolutionOperator(DelegateData& delegateData,
                                       TfLiteContext* tfLiteContext,
                                       TfLiteNode* tfLiteNode,
                                       int nodeIndex,
                                       int32_t operatorCode)
 {
-    return kTfLiteError;
+    switch(operatorCode)
+    {
+        case kTfLiteBuiltinConv2d:
+            return VisitConv2dOperator(delegateData, tfLiteContext, tfLiteNode, nodeIndex, operatorCode);
+        case kTfLiteBuiltinDepthwiseConv2d:
+            return VisitDepthwiseConv2dOperator(delegateData, tfLiteContext, tfLiteNode, nodeIndex, operatorCode);
+        case kTfLiteBuiltinTransposeConv:
+            return VisitTransposeConv2dOperator(delegateData, tfLiteContext, tfLiteNode, nodeIndex, operatorCode);
+        default:
+            return kTfLiteError;
+    }
 }
 
 } // namespace armnnDelegate
diff --git a/delegate/src/DelegateUtils.hpp b/delegate/src/DelegateUtils.hpp
index 71222276b4..dcad38503a 100644
--- a/delegate/src/DelegateUtils.hpp
+++ b/delegate/src/DelegateUtils.hpp
@@ -101,6 +101,18 @@ bool IsValid(const TfLiteTensor* tfLiteTensor)
     return tfLiteTensor == nullptr ? false : true;
 }
 
+uint32_t NonNegative(int32_t value, int nodeIndex)
+{
+    if (value < 0)
+    {
+        throw armnn::Exception("TfLiteArmnnDelegate: Non-negative value in node " + nodeIndex);
+    }
+    else
+    {
+        return static_cast<uint32_t>(value);
+    }
+}
+
 bool IsDynamicTensor(const TfLiteTensor& tfLiteTensor)
 {
     auto tensorAllocationType = tfLiteTensor.allocation_type;
@@ -339,7 +351,8 @@ armnn::DataType GetDataType(const TfLiteTensor& tfLiteTensor)
     }
 }
 
-armnn::TensorInfo GetTensorInfoForTfLiteTensor(const TfLiteTensor& tfLiteTensor)
+armnn::TensorInfo GetTensorInfoForTfLiteTensor(const TfLiteTensor& tfLiteTensor,
+                                               const armnn::PermutationVector& dimensionMappings = {0, 1, 2, 3})
 {
     armnn::DataType type = GetDataType(tfLiteTensor);
     armnn::TensorInfo ret;
@@ -379,7 +392,8 @@ armnn::TensorInfo GetTensorInfoForTfLiteTensor(const TfLiteTensor& tfLiteTensor)
                 quantizationScales.push_back(affineQuantization->scale->data[i]);
             }
             ret.SetQuantizationScales(quantizationScales);
-            ret.SetQuantizationDim(armnn::MakeOptional<unsigned int>(affineQuantization->quantized_dimension));
+            ret.SetQuantizationDim(dimensionMappings[armnn::numeric_cast<unsigned int>(
+                affineQuantization->quantized_dimension)]);
         }
         else
         {
@@ -399,23 +413,23 @@ armnn::TensorInfo GetTensorInfoForTfLiteTensor(const TfLiteTensor& tfLiteTensor)
 
 armnn::ConstTensor CreateConstTensor(const TfLiteTensor* tfLiteTensor,
                                      armnn::TensorInfo& tensorInfo,
-                                     armnn::Optional<armnn::PermutationVector&> permutationVector)
+                                     armnn::Optional<armnn::PermutationVector&> permutationVector,
+                                     void* permutationData = nullptr)
 {
     if (tfLiteTensor->allocation_type != kTfLiteMmapRo)
     {
         throw armnn::Exception("TfLiteArmnnDelegate: Not constant allocation type: " + tfLiteTensor->allocation_type);
     }
 
-    if (permutationVector.has_value() && permutationVector.value().GetSize() > 0)
+    if (permutationVector.has_value() && permutationVector.value().GetSize() > 0 && permutationData != nullptr)
     {
-        std::vector<uint8_t> swizzledData;
-        swizzledData.resize(tensorInfo.GetNumBytes());
         armnnUtils::Permute(armnnUtils::Permuted(tensorInfo.GetShape(), permutationVector.value()),
                             permutationVector.value(),
                             tfLiteTensor->data.data,
-                            swizzledData.data(),
+                            permutationData,
                             armnn::GetDataTypeSize(tensorInfo.GetDataType()));
-        return armnn::ConstTensor(armnnUtils::Permuted(tensorInfo, permutationVector.value()), swizzledData.data());
+
+        return armnn::ConstTensor(armnnUtils::Permuted(tensorInfo, permutationVector.value()), permutationData);
     }
     else
     {
@@ -423,4 +437,29 @@ armnn::ConstTensor CreateConstTensor(const TfLiteTensor* tfLiteTensor,
     }
 }
 
+void CalcPadding(uint32_t inputSize,
+                 uint32_t filterSize,
+                 uint32_t stride,
+                 uint32_t dilation,
+                 uint32_t& paddingFront,
+                 uint32_t& paddingBack,
+                 TfLitePadding padding)
+{
+    paddingFront = 0;
+    paddingBack = 0;
+    if (padding == kTfLitePaddingSame)
+    {
+        uint32_t outputSize = (inputSize + stride - 1) / stride;
+        uint32_t dilatedSize = filterSize + (dilation - 1) * (filterSize - 1);
+        uint32_t temp = (outputSize - 1) * stride + dilatedSize;
+        if (temp > inputSize)
+        {
+            paddingFront = (temp - inputSize) / 2;
+            paddingBack = (temp - inputSize) - paddingFront;
+        }
+    }
+}
+
+
+
 } // namespace anonymous
diff --git a/delegate/src/Pooling.hpp b/delegate/src/Pooling.hpp
index b3a2af8900..07c1946d73 100644
--- a/delegate/src/Pooling.hpp
+++ b/delegate/src/Pooling.hpp
@@ -5,6 +5,8 @@
 
 #pragma once
 
+#include "DelegateUtils.hpp"
+
 #include <tensorflow/lite/builtin_ops.h>
 #include <tensorflow/lite/c/builtin_op_data.h>
 #include <tensorflow/lite/c/common.h>
@@ -13,29 +15,6 @@
 namespace armnnDelegate
 {
 
-void CalcPadding(uint32_t inputSize,
-                 uint32_t filterSize,
-                 uint32_t stride,
-                 uint32_t dilation,
-                 uint32_t& paddingFront,
-                 uint32_t& paddingBack,
-                 TfLitePadding padding)
-{
-    paddingFront = 0;
-    paddingBack = 0;
-    if (padding == kTfLitePaddingSame)
-    {
-        uint32_t outputSize = (inputSize + stride - 1) / stride;
-        uint32_t dilatedSize = filterSize + (dilation - 1) * (filterSize - 1);
-        uint32_t temp = (outputSize - 1) * stride + dilatedSize;
-        if (temp > inputSize)
-        {
-            paddingFront = (temp - inputSize) / 2;
-            paddingBack = (temp - inputSize) - paddingFront;
-        }
-    }
-}
-
 TfLiteStatus VisitPoolingOperator(DelegateData& delegateData,
                                   TfLiteContext* tfLiteContext,
                                   TfLiteNode* tfLiteNode,
diff --git a/delegate/src/test/Convolution2dTest.cpp b/delegate/src/test/Convolution2dTest.cpp
new file mode 100644
index 0000000000..4e9377a24d
--- /dev/null
+++ b/delegate/src/test/Convolution2dTest.cpp
@@ -0,0 +1,428 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ConvolutionTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void Conv2DWithBiasesFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 5, 5, 1 };
+    std::vector<int32_t> filterShape { 1, 3, 3, 1 };
+    std::vector<int32_t> biasShape { 1 };
+    std::vector<int32_t> outputShape { 1, 3, 3, 1 };
+
+    static std::vector<float> inputValues =
+        {
+            1, 5, 2, 3, 5,
+            8, 7, 3, 6, 3,
+            3, 3, 9, 1, 9,
+            4, 1, 8, 1, 3,
+            6, 8, 1, 9, 2
+        };
+
+    std::vector<float> filterValues =
+        {
+            4, 5, 6,
+            0, 0, 0,
+            3, 2, 1
+        };
+
+    std::vector<float> biasValues = { 0 };
+
+    std::vector<float> expectedOutputValues =
+        {
+            23, 33, 24,
+            91, 99, 48,
+            26, 50, 19
+        };
+
+    tflite::Padding padding = tflite::Padding_SAME;
+
+    ConvolutionTest<float>(tflite::BuiltinOperator_CONV_2D,
+                                 ::tflite::TensorType_FLOAT32,
+                                 2, // strideX
+                                 2, // strideY
+                                 1, // dilationX
+                                 1, // dilationY
+                                 padding,
+                                 tflite::ActivationFunctionType_NONE,
+                                 backends,
+                                 inputShape,
+                                 filterShape,
+                                 outputShape,
+                                 inputValues,
+                                 filterValues,
+                                 expectedOutputValues,
+                                 biasShape,
+                                 biasValues);
+}
+
+void Conv2DWithBiasesUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 2, 2, 1 };
+    std::vector<int32_t> filterShape { 1, 2, 2, 1 };
+    std::vector<int32_t> biasShape { 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    static std::vector<uint8_t> inputValues = { 1, 2, 3, 4 };
+
+    std::vector<uint8_t> filterValues = { 2, 1, 0, 6 };
+
+    std::vector<int32_t> biasValues = { 10 };
+
+    std::vector<uint8_t> expectedOutputValues =
+        {
+            (1 * 2 + 2 * 1 + 3 * 0 + 4 * 6 + 10) / 2, // 19
+            (2 * 2 + 0 * 1 + 4 * 0 + 0 * 6 + 10) / 2, // 7
+            (3 * 2 + 4 * 1 + 0 * 0 + 0 * 6 + 10) / 2, // 10
+            (4 * 2 + 0 * 1 + 0 * 0 + 0 * 6 + 10) / 2,  // 9
+        };
+
+    tflite::Padding padding = tflite::Padding_SAME;
+
+    ConvolutionTest<uint8_t, int32_t>(tflite::BuiltinOperator_CONV_2D,
+                                            ::tflite::TensorType_UINT8,
+                                            1, // strideX
+                                            1, // strideY
+                                            1, // dilationX
+                                            1, // dilationY
+                                            padding,
+                                            tflite::ActivationFunctionType_NONE,
+                                            backends,
+                                            inputShape,
+                                            filterShape,
+                                            outputShape,
+                                            inputValues,
+                                            filterValues,
+                                            expectedOutputValues,
+                                            biasShape,
+                                            biasValues);
+}
+
+void Conv2DWithBiasesReluUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 2, 2, 1 };
+    std::vector<int32_t> filterShape { 1, 2, 2, 1 };
+    std::vector<int32_t> biasShape { 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    static std::vector<uint8_t> inputValues = { 1, 2, 4, 8 };
+
+    std::vector<uint8_t> filterValues = { 2, 1, 0, 6 };
+
+    std::vector<int32_t> biasValues = { 16 };
+
+    // factors to consider:
+    // - the filter zero point is non zero, hence the (x-fz)
+    // - the output scale is 2 hence the /2
+    // - output zero point is non zero, hence the +outZero
+    // - RELU cuts negative values and then we add the output zero point
+    uint8_t bias = 16;
+    uint8_t outZero = 20;
+    uint8_t fz = 4; // filter zero point
+
+    std::vector<uint8_t> expectedOutputValues =
+        {
+            std::max(outZero, static_cast<uint8_t>((1*(2-fz) + 2*(1-fz) + 4*(0-fz) + 8*(6-fz) + bias)/2 + outZero)),
+            std::max(outZero, static_cast<uint8_t>((2*(2-fz) + 0*(1-fz) + 8*(0-fz) + 0*(6-fz) + bias)/2 + outZero)),
+            std::max(outZero, static_cast<uint8_t>((4*(2-fz) + 8*(1-fz) + 0*(0-fz) + 0*(6-fz) + bias)/2 + outZero)),
+            std::max(outZero, static_cast<uint8_t>((8*(2-fz) + 0*(1-fz) + 0*(0-fz) + 0*(6-fz) + bias)/2 + outZero))
+        };
+
+    tflite::Padding padding = tflite::Padding_SAME;
+
+    ConvolutionTest<uint8_t, int32_t>(tflite::BuiltinOperator_CONV_2D,
+                                            ::tflite::TensorType_UINT8,
+                                            1, // strideX
+                                            1, // strideY
+                                            1, // dilationX
+                                            1, // dilationY
+                                            padding,
+                                            tflite::ActivationFunctionType_RELU,
+                                            backends,
+                                            inputShape,
+                                            filterShape,
+                                            outputShape,
+                                            inputValues,
+                                            filterValues,
+                                            expectedOutputValues,
+                                            biasShape,
+                                            biasValues,
+                                            1, // filter scale
+                                            4, // filter offset
+                                            2, // output scale
+                                            20); // output offset
+}
+
+void Conv2DWithBiasesRelu6Uint8Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 2, 2, 1 };
+    std::vector<int32_t> filterShape { 1, 2, 2, 1 };
+    std::vector<int32_t> biasShape { 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    static std::vector<uint8_t> inputValues = { 1, 2, 4, 1 };
+
+    std::vector<uint8_t> filterValues = { 2, 1, 0, 6 };
+
+    std::vector<int32_t> biasValues = { 0 };
+
+    // factors to consider:
+    // - the output scale is 2 hence the /2
+    // - RELU6 cuts output values at +6
+    uint8_t relu6Min = 6 / 2; // divide by output scale
+
+    std::vector<uint8_t> expectedOutputValues =
+        {
+            std::min(relu6Min, static_cast<uint8_t>((1 * 2 + 2 * 1 + 4 * 0 + 1 * 6) / 2)),
+            std::min(relu6Min, static_cast<uint8_t>((2 * 2 + 0 * 1 + 1 * 0 + 0 * 6) / 2)),
+            std::min(relu6Min, static_cast<uint8_t>((4 * 2 + 1 * 1 + 0 * 0 + 0 * 6) / 2)),
+            std::min(relu6Min, static_cast<uint8_t>((1 * 2 + 0 * 1 + 0 * 0 + 0 * 6) / 2))
+        };
+
+    tflite::Padding padding = tflite::Padding_SAME;
+
+    ConvolutionTest<uint8_t, int32_t>(tflite::BuiltinOperator_CONV_2D,
+                                            ::tflite::TensorType_UINT8,
+                                            1, // strideX
+                                            1, // strideY
+                                            1, // dilationX
+                                            1, // dilationY
+                                            padding,
+                                            tflite::ActivationFunctionType_RELU6,
+                                            backends,
+                                            inputShape,
+                                            filterShape,
+                                            outputShape,
+                                            inputValues,
+                                            filterValues,
+                                            expectedOutputValues,
+                                            biasShape,
+                                            biasValues);
+}
+
+TEST_SUITE("Convolution2dTest_CpuRef")
+{
+
+TEST_CASE ("Conv2DWithBiases_Fp32_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    Conv2DWithBiasesFp32Test(backends);
+}
+
+TEST_CASE ("Conv2DWithBiases_Uint8_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    Conv2DWithBiasesUint8Test(backends);
+}
+
+TEST_CASE ("Conv2DWithBiases_Relu_Uint8_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    Conv2DWithBiasesReluUint8Test(backends);
+}
+
+TEST_CASE ("Conv2DWithBiases_Relu6_Uint8_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    Conv2DWithBiasesRelu6Uint8Test(backends);
+}
+
+} //End of TEST_SUITE("Convolution2dTest_CpuRef")
+
+TEST_SUITE("Convolution2dTest_CpuAcc")
+{
+
+TEST_CASE ("Conv2DWithBiases_Fp32_CpuAcc_Test")
+{
+std::vector <armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+Conv2DWithBiasesFp32Test(backends);
+}
+
+TEST_CASE ("Conv2DWithBiases_Uint8_CpuAcc_Test")
+{
+std::vector <armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+Conv2DWithBiasesUint8Test(backends);
+}
+
+TEST_CASE ("Conv2DWithBiases_Relu_Uint8_CpuAcc_Test")
+{
+std::vector <armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+Conv2DWithBiasesReluUint8Test(backends);
+}
+
+TEST_CASE ("Conv2DWithBiases_Relu6Uint8_CpuAcc_Test")
+{
+std::vector <armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+Conv2DWithBiasesRelu6Uint8Test(backends);
+}
+
+} //End of TEST_SUITE("Convolution2dTest_CpuAcc")
+
+TEST_SUITE("Convolution2dTest_GpuAcc")
+{
+
+TEST_CASE ("Conv2DWithBiases_Fp32_GpuAcc_Test")
+{
+std::vector <armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+Conv2DWithBiasesFp32Test(backends);
+}
+
+TEST_CASE ("Conv2DWithBiases_Uint8_GpuAcc_Test")
+{
+std::vector <armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+Conv2DWithBiasesUint8Test(backends);
+}
+
+TEST_CASE ("Conv2DWithBiases_Relu_Uint8_GpuAcc_Test")
+{
+std::vector <armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+Conv2DWithBiasesReluUint8Test(backends);
+}
+
+TEST_CASE ("Conv2DWithBiases_Relu_Uint8_GpuAcc_Test")
+{
+std::vector <armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+Conv2DWithBiasesRelu6Uint8Test(backends);
+}
+
+} //End of TEST_SUITE("Convolution2dTest_GpuAcc")
+
+void TransposeConvUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> transposeTensorShape { 4 };
+    std::vector<int32_t> filterShape { 1, 2, 2, 1 };
+    std::vector<int32_t> inputShape { 1, 2, 2, 1 };
+    std::vector<int32_t> outputShape { 1, 3, 3, 1 };
+
+    std::vector<int32_t> transposeData = { 1, 3, 3, 1 };
+    static std::vector<uint8_t> inputValues = { 1, 2, 3, 4 };
+    std::vector<uint8_t> filterValues = { 0, 1, 2, 4 };
+    std::vector<uint8_t> expectedOutputValues =
+        {
+            0, 1,  2,
+            2, 11, 12,
+            6, 20, 16
+        };
+
+    tflite::Padding padding = tflite::Padding_VALID;
+    TransposeConvTest<uint8_t>(backends,
+                             ::tflite::TensorType_UINT8,
+                             1, // strideX
+                             1, // strideY
+                             padding,
+                             transposeTensorShape,
+                             filterShape,
+                             inputShape,
+                             outputShape,
+                             transposeData,
+                             filterValues,
+                             inputValues,
+                             expectedOutputValues);
+}
+
+void TransposeConvFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> transposeTensorShape { 4 };
+    std::vector<int32_t> filterShape { 1, 2, 2, 1 };
+    std::vector<int32_t> inputShape { 1, 2, 2, 1 };
+    std::vector<int32_t> outputShape { 1, 3, 3, 1 };
+
+    std::vector<int32_t> transposeData = { 1, 3, 3, 1 };
+    static std::vector<float> inputValues = { 1, 2, 3, 4 };
+    std::vector<float> filterValues = { 0, 1, 2, 4 };
+    std::vector<float> expectedOutputValues =
+        {
+            0, 1,  2,
+            2, 11, 12,
+            6, 20, 16
+        };
+
+    tflite::Padding padding = tflite::Padding_VALID;
+    TransposeConvTest<float>(backends,
+                             ::tflite::TensorType_FLOAT32,
+                             1, // strideX
+                             1, // strideY
+                             padding,
+                             transposeTensorShape,
+                             filterShape,
+                             inputShape,
+                             outputShape,
+                             transposeData,
+                             filterValues,
+                             inputValues,
+                             expectedOutputValues);
+}
+
+TEST_SUITE("TransposeConv_CpuRef_Test")
+{
+
+TEST_CASE ("TransposeConv_Fp32_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    TransposeConvFp32Test(backends);
+}
+
+TEST_CASE ("TransposeConv_Uint8_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    TransposeConvUint8Test(backends);
+}
+
+} // End of  TEST_SUITE(TransposeConv_CpuRef_Test)
+
+TEST_SUITE("TransposeConv_CpuAcc_Test")
+{
+
+TEST_CASE ("TransposeConv_Fp32_Test")
+{
+std::vector <armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+TransposeConvFp32Test(backends);
+}
+
+TEST_CASE ("TransposeConv_Uint8_Test")
+{
+std::vector <armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+TransposeConvUint8Test(backends);
+}
+
+} // End of  TEST_SUITE(TransposeConv_CpuAcc_Test)
+
+TEST_SUITE("TransposeConv_GpuAcc_Test")
+{
+
+TEST_CASE ("TransposeConv_Fp32_Test")
+{
+std::vector <armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+TransposeConvFp32Test(backends);
+}
+
+TEST_CASE ("TransposeConv_Uint8_Test")
+{
+std::vector <armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+TransposeConvUint8Test(backends);
+}
+
+} // End of  TEST_SUITE(TransposeConv_GpuAcc_Test)
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/delegate/src/test/ConvolutionTestHelper.hpp b/delegate/src/test/ConvolutionTestHelper.hpp
new file mode 100644
index 0000000000..b7705cc904
--- /dev/null
+++ b/delegate/src/test/ConvolutionTestHelper.hpp
@@ -0,0 +1,504 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+template <typename T, typename B = float>
+std::vector<char> CreateConv2dTfLiteModel(tflite::BuiltinOperator convolutionOperatorCode,
+                                          tflite::TensorType tensorType,
+                                          uint32_t strideX,
+                                          uint32_t strideY,
+                                          uint32_t dilationX,
+                                          uint32_t dilationY,
+                                          tflite::Padding padding,
+                                          tflite::ActivationFunctionType fused_activation_function,
+                                          const std::vector <int32_t>& inputTensorShape,
+                                          const std::vector <int32_t>& filterTensorShape,
+                                          const std::vector <int32_t>& biasTensorShape,
+                                          const std::vector <int32_t>& outputTensorShape,
+                                          const std::vector <T>& filterData,
+                                          const std::vector <B>& biasData,
+                                          float filterScale = 1.0f,
+                                          int filterOffset = 0,
+                                          float outputQuantScale = 2.0f,
+                                          int outputQuantOffset = 0,
+                                          float quantScale = 1.0f,
+                                          int quantOffset = 0,
+                                          int32_t depth_multiplier = 1)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::array<flatbuffers::Offset<tflite::Buffer>, 3> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+    buffers[1] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(filterData.data()),
+                                                             sizeof(T) * filterData.size()));
+
+    buffers[2] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(biasData.data()),
+                                                             sizeof(B) * biasData.size()));
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+    auto outputQuantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ outputQuantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ outputQuantOffset }));
+    auto filterQuantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ filterScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ filterOffset }));
+
+    std::array<flatbuffers::Offset<Tensor>, 4> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(filterTensorShape.data(),
+                                                                      filterTensorShape.size()),
+                              tensorType,
+                              1,
+                              flatBufferBuilder.CreateString("filter"),
+                              filterQuantizationParameters);
+
+    auto biasTensorType = ::tflite::TensorType_FLOAT32;
+    if (tensorType == ::tflite::TensorType_UINT8)
+    {
+        biasTensorType = ::tflite::TensorType_INT32;
+    }
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(biasTensorShape.data(), biasTensorShape.size()),
+                              biasTensorType,
+                              2,
+                              flatBufferBuilder.CreateString("bias"),
+                              quantizationParameters);
+    tensors[3] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              outputQuantizationParameters);
+
+    flatbuffers::Offset<void> operatorBuiltinOptions;
+    tflite::BuiltinOptions operatorBuiltinOptionsType;
+
+    if(convolutionOperatorCode == tflite::BuiltinOperator_DEPTHWISE_CONV_2D)
+    {
+        operatorBuiltinOptionsType = tflite::BuiltinOptions_DepthwiseConv2DOptions;
+        operatorBuiltinOptions = CreateDepthwiseConv2DOptions(flatBufferBuilder,
+                                                              padding,
+                                                              strideX,
+                                                              strideY,
+                                                              depth_multiplier,
+                                                              fused_activation_function,
+                                                              dilationX,
+                                                              dilationY).Union();
+    }
+    if(convolutionOperatorCode == tflite::BuiltinOperator_CONV_2D)
+    {
+        operatorBuiltinOptionsType = tflite::BuiltinOptions_Conv2DOptions;
+        operatorBuiltinOptions = CreateConv2DOptions(flatBufferBuilder,
+                                                     padding,
+                                                     strideX,
+                                                     strideY,
+                                                     fused_activation_function,
+                                                     dilationX,
+                                                     dilationY).Union();
+    }
+
+    // create operator
+    const std::vector<int> operatorInputs{{0, 1, 2}};
+    const std::vector<int> operatorOutputs{{3}};
+    flatbuffers::Offset <Operator> convolutionOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType,
+                       operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{ {0, 1, 2} };
+    const std::vector<int> subgraphOutputs{{3}};
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&convolutionOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: Convolution2d Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, convolutionOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T, typename B = float>
+void ConvolutionTest(tflite::BuiltinOperator convolutionOperatorCode,
+                     tflite::TensorType tensorType,
+                     uint32_t strideX,
+                     uint32_t strideY,
+                     uint32_t dilationX,
+                     uint32_t dilationY,
+                     tflite::Padding padding,
+                     tflite::ActivationFunctionType fused_activation_function,
+                     std::vector<armnn::BackendId>& backends,
+                     std::vector<int32_t>& inputShape,
+                     std::vector<int32_t>& filterShape,
+                     std::vector<int32_t>& outputShape,
+                     std::vector<T>& inputValues,
+                     std::vector<T>& filterValues,
+                     std::vector<T>& expectedOutputValues,
+                     const std::vector<int32_t>& biasShape = {},
+                     const std::vector<B>& biasValues = {},
+                     float filterScale = 1.0f,
+                     int filterOffset = 0,
+                     float outputQuantScale = 2.0f,
+                     int outputQuantOffset = 0,
+                     float quantScale = 1.0f,
+                     int quantOffset = 0,
+                     int32_t depth_multiplier = 1)
+
+{
+    using namespace tflite;
+
+    std::vector<char> modelBuffer;
+    modelBuffer = CreateConv2dTfLiteModel(convolutionOperatorCode,
+                                          tensorType,
+                                          strideX,
+                                          strideY,
+                                          dilationX,
+                                          dilationY,
+                                          padding,
+                                          fused_activation_function,
+                                          inputShape,
+                                          filterShape,
+                                          biasShape,
+                                          outputShape,
+                                          filterValues,
+                                          biasValues,
+                                          filterScale,
+                                          filterOffset,
+                                          outputQuantScale,
+                                          outputQuantOffset,
+                                          quantScale,
+                                          quantOffset,
+                                          depth_multiplier);
+
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+                        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    auto tfLiteDelegateInputId = tfLiteInterpreter->inputs()[0];
+    auto tfLiteDelageInputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        tfLiteDelageInputData[i] = inputValues[i];
+    }
+
+    auto armnnDelegateInputId = armnnDelegateInterpreter->inputs()[0];
+    auto armnnDelegateInputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        armnnDelegateInputData[i] = inputValues[i];
+    }
+    // Run EnqueueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteDelagateOutputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateOutputId);
+    for (size_t i = 0; i < expectedOutputValues.size(); i++)
+    {
+        CHECK(tfLiteDelagateOutputData[i] == armnnDelegateOutputData[i]);
+        CHECK(doctest::Approx(tfLiteDelagateOutputData[i]).epsilon(0.000001f) == expectedOutputValues[i]);
+        CHECK(doctest::Approx(armnnDelegateOutputData[i]).epsilon(0.000001f) == expectedOutputValues[i]);
+    }
+}
+
+template <typename T>
+std::vector<char> CreateTransposeConvTfLiteModel(tflite::TensorType tensorType,
+                                                 uint32_t strideX,
+                                                 uint32_t strideY,
+                                                 tflite::Padding padding,
+                                                 const std::vector <int32_t>& transposeTensorShape,
+                                                 const std::vector <int32_t>& filterTensorShape,
+                                                 const std::vector <int32_t>& inputTensorShape,
+                                                 const std::vector <int32_t>& outputTensorShape,
+                                                 const std::vector <int32_t>& transposeData,
+                                                 const std::vector <T>& filterData,
+                                                 float filterScale = 1.0f,
+                                                 int filterOffset = 0,
+                                                 float outputQuantScale = 2.0f,
+                                                 int outputQuantOffset = 0,
+                                                 float quantScale = 1.0f,
+                                                 int quantOffset = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::array<flatbuffers::Offset<tflite::Buffer>, 3> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+    buffers[1] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(transposeData.data()),
+                                                             sizeof(int32_t) * transposeData.size()));
+    buffers[2] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(filterData.data()),
+                                                             sizeof(T) * filterData.size()));
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+    auto outputQuantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ outputQuantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ outputQuantOffset }));
+    auto filterQuantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ filterScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ filterOffset }));
+
+    std::array<flatbuffers::Offset<Tensor>, 4> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(transposeTensorShape.data(),
+                              transposeTensorShape.size()),
+                              tflite::TensorType_INT32,
+                              1);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(filterTensorShape.data(),
+                              filterTensorShape.size()),
+                              tensorType,
+                              2,
+                              flatBufferBuilder.CreateString("filter"),
+                              filterQuantizationParameters);
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                              inputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+    tensors[3] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                              outputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              outputQuantizationParameters);
+
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_TransposeConvOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions =
+        CreateTransposeConvOptions(flatBufferBuilder, padding, strideX, strideY).Union();
+
+    // create operator
+    const std::vector<int> operatorInputs{{0, 1, 2}};
+    const std::vector<int> operatorOutputs{{3}};
+    flatbuffers::Offset <Operator> convolutionOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType,
+                       operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{ {0, 1, 2} };
+    const std::vector<int> subgraphOutputs{{3}};
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&convolutionOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: TransposeConv Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode =
+        CreateOperatorCode(flatBufferBuilder, tflite::BuiltinOperator_TRANSPOSE_CONV);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void TransposeConvTest(std::vector<armnn::BackendId>& backends,
+                       tflite::TensorType tensorType,
+                       uint32_t strideX,
+                       uint32_t strideY,
+                       tflite::Padding padding,
+                       const std::vector <int32_t>& transposeTensorShape,
+                       const std::vector <int32_t>& filterTensorShape,
+                       const std::vector <int32_t>& inputTensorShape,
+                       const std::vector <int32_t>& outputTensorShape,
+                       const std::vector <int32_t>& transposeData,
+                       const std::vector <T>& filterData,
+                       std::vector<T>& inputValues,
+                       std::vector<T>& expectedOutputValues,
+                       float filterScale = 1.0f,
+                       int filterOffset = 0,
+                       float outputQuantScale = 1.0f,
+                       int outputQuantOffset = 0,
+                       float quantScale = 1.0f,
+                       int quantOffset = 0)
+{
+    using namespace tflite;
+
+    std::vector<char> modelBuffer;
+    modelBuffer = CreateTransposeConvTfLiteModel<T>(tensorType,
+                                                    strideX,
+                                                    strideY,
+                                                    padding,
+                                                    transposeTensorShape,
+                                                    filterTensorShape,
+                                                    inputTensorShape,
+                                                    outputTensorShape,
+                                                    transposeData,
+                                                    filterData,
+                                                    filterScale,
+                                                    filterOffset,
+                                                    outputQuantScale,
+                                                    outputQuantOffset,
+                                                    quantScale,
+                                                    quantOffset);
+
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    auto tfLiteDelegateInputId = tfLiteInterpreter->inputs()[2];
+    auto tfLiteDelageInputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        tfLiteDelageInputData[i] = inputValues[i];
+    }
+
+    auto armnnDelegateInputId = armnnDelegateInterpreter->inputs()[2];
+    auto armnnDelegateInputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        armnnDelegateInputData[i] = inputValues[i];
+    }
+    // Run EnqueueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteDelagateOutputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateOutputId);
+    for (size_t i = 0; i < expectedOutputValues.size(); i++)
+    {
+        CHECK(armnnDelegateOutputData[i] == expectedOutputValues[i]);
+        CHECK(tfLiteDelagateOutputData[i] == expectedOutputValues[i]);
+        CHECK(tfLiteDelagateOutputData[i] == armnnDelegateOutputData[i]);
+    }
+}
+
+} // anonymous namespace
+
+
+
+
diff --git a/delegate/src/test/DepthwiseConvolution2dTest.cpp b/delegate/src/test/DepthwiseConvolution2dTest.cpp
new file mode 100644
index 0000000000..6ca456982b
--- /dev/null
+++ b/delegate/src/test/DepthwiseConvolution2dTest.cpp
@@ -0,0 +1,180 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ConvolutionTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void DepthwiseConv2dValidReluFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 2, 2 };
+    std::vector<int32_t> filterShape { 1, 2, 2, 4 };
+    std::vector<int32_t> biasShape { 4 };
+    std::vector<int32_t> outputShape { 1, 3, 3, 1 };
+
+    static std::vector<float> inputValues =
+        {
+            1, 2,  7,  8,
+            3, 4,  9, 10,
+            5, 6, 11, 12
+        };
+
+    std::vector<float> filterValues =
+        {
+            1,    2,   3,   4,
+           -9,   10, -11,  12,
+            5,    6,   7,   8,
+            13,  -14,  15, -16
+        };
+
+    std::vector<float> biasValues = { 1, 2, 3, 4 };
+
+    std::vector<float> expectedOutputValues =
+        {
+            71, 0,  99, 0,
+            91, 0, 127, 0
+        };
+
+    tflite::Padding padding = tflite::Padding_VALID;
+    int32_t depth_multiplier = 2;
+
+    ConvolutionTest<float>(tflite::BuiltinOperator_DEPTHWISE_CONV_2D,
+                           ::tflite::TensorType_FLOAT32,
+                           1, // strideX
+                           1, // strideY
+                           1, // dilationX
+                           1, // dilationY
+                           padding,
+                           tflite::ActivationFunctionType_RELU,
+                           backends,
+                           inputShape,
+                           filterShape,
+                           outputShape,
+                           inputValues,
+                           filterValues,
+                           expectedOutputValues,
+                           biasShape,
+                           biasValues,
+                           1.0f, // filterScale
+                           0,    // filterOffset
+                           2.0f, // outputQuantScale
+                           0,    // outputQuantOffset
+                           1.0f, // quantScale
+                           0,    // quantOffset
+                           depth_multiplier);
+}
+
+void DepthwiseConv2dSameUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 3, 1 };
+    std::vector<int32_t> filterShape { 1, 3, 3, 1 };
+    std::vector<int32_t> biasShape { 1 } ;
+    std::vector<int32_t> outputShape { 1, 3, 3, 1 };
+
+    static std::vector<uint8_t> inputValues =
+        {
+            0, 1, 2,
+            3, 4, 5,
+            6, 7, 8
+        };
+
+    std::vector<uint8_t> filterValues = { 9, 8, 7,  6, 5, 4,  3, 2, 1 };
+
+    std::vector<int32_t> biasValues = { 10 };
+
+    std::vector<uint8_t> expectedOutputValues =
+        {
+            12,  23, 24, // ( 14+10)/2, ( 35+10)/2, ( 38+10)/2,
+            34,  65, 61, // ( 57+10)/2, (120+10)/2, (111+10)/2,
+            60, 104, 84  // (110+10)/2, (197+10)/2, (158+10)/2
+        };
+
+    tflite::Padding padding = tflite::Padding_SAME;
+
+    ConvolutionTest<uint8_t, int32_t>(tflite::BuiltinOperator_DEPTHWISE_CONV_2D,
+                                      ::tflite::TensorType_UINT8,
+                                      1, // strideX
+                                      1, // strideY
+                                      1, // dilationX
+                                      1, // dilationY
+                                      padding,
+                                      tflite::ActivationFunctionType_NONE,
+                                      backends,
+                                      inputShape,
+                                      filterShape,
+                                      outputShape,
+                                      inputValues,
+                                      filterValues,
+                                      expectedOutputValues,
+                                      biasShape,
+                                      biasValues);
+}
+
+TEST_SUITE("DepthwiseConv2d_CpuRef_Tests")
+{
+
+TEST_CASE ("DepthwiseConv2d_Valid_Relu_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    DepthwiseConv2dValidReluFp32Test(backends);
+}
+
+TEST_CASE ("DepthwiseConv2d_Same_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    DepthwiseConv2dSameUint8Test(backends);
+}
+
+}//End of TEST_SUITE("DepthwiseConv2d_CpuRef_Tests")
+
+TEST_SUITE("DepthwiseConv2d_CpuAcc_Tests")
+{
+
+TEST_CASE ("DepthwiseConv2d_Valid_Relu_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    DepthwiseConv2dValidReluFp32Test(backends);
+}
+
+TEST_CASE ("DepthwiseConv2d_Same_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    DepthwiseConv2dSameUint8Test(backends);
+}
+
+}//End of TEST_SUITE("DepthwiseConv2d_CpuAcc_Tests")
+
+TEST_SUITE("DepthwiseConv2d_GpuAcc_Tests")
+{
+
+TEST_CASE ("DepthwiseConv2d_Valid_Relu_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    DepthwiseConv2dValidReluFp32Test(backends);
+}
+
+TEST_CASE ("DepthwiseConv2d_Same_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    DepthwiseConv2dSameUint8Test(backends);
+}
+
+}//End of TEST_SUITE("DepthwiseConv2d_GpuAcc_Tests")
+
+} // namespace armnnDelegate
\ No newline at end of file
-- 
cgit v1.2.1