From fbd2817039db8f856d75cf9d3d3980baeaa554c9 Mon Sep 17 00:00:00 2001
From: Teresa Charlin <teresa.charlinreyes@arm.com>
Date: Thu, 7 Jul 2022 14:24:59 +0100
Subject: Revert "Revert "IVGCVSW-6650 Refactor ExecuteNetwork""

This reverts commit 1a7f033768acb27da11503bd29abb468d2e77f9e.

List of fixes to be able to add this code again:
* "emplacing_back" the vector inputTensors into the vector m_InputTensorsVec outside the for loop
* GetIOInfo() uses IOptimizedNetwork instead of INetwork, where the infered shapes are not saved
* Add missing data type Signed32 to SetupInputsAndOutputs()
* PrintOutputTensors() prints the actual output without dequantizing
* Add profilingDetailsMethod as input in networkProperties in ArmNNExecutor constructor
* Fix typos



Change-Id: I91de166f87228282db3efa27431fe91458834442

Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com>
Change-Id: Ic6634d48892d11e5f146cdf285e1e333e93e9937
Signed-off-by: Francis Murtagh <francis.murtagh@arm.com>
---
 include/armnn/INetwork.hpp                         |    4 +-
 include/armnn/TypesUtils.hpp                       |   24 +-
 src/armnn/Network.cpp                              |   13 +
 src/armnn/OptimizedNetworkImpl.hpp                 |    3 +
 tests/CMakeLists.txt                               |   10 +
 tests/ExecuteNetwork/ArmNNExecutor.cpp             |  805 +++++++++++++++
 tests/ExecuteNetwork/ArmNNExecutor.hpp             |  161 +++
 tests/ExecuteNetwork/ExecuteNetwork.cpp            | 1076 +-------------------
 tests/ExecuteNetwork/ExecuteNetworkParams.cpp      |  134 +--
 tests/ExecuteNetwork/ExecuteNetworkParams.hpp      |   90 +-
 .../ExecuteNetworkProgramOptions.cpp               |  165 +--
 tests/ExecuteNetwork/IExecutor.hpp                 |   22 +
 tests/ExecuteNetwork/TfliteExecutor.cpp            |  251 +++++
 tests/ExecuteNetwork/TfliteExecutor.hpp            |   35 +
 tests/InferenceModel.hpp                           |   37 +-
 .../NetworkExecutionUtils.cpp                      |  309 +-----
 .../NetworkExecutionUtils.hpp                      |  279 ++++-
 17 files changed, 1774 insertions(+), 1644 deletions(-)
 create mode 100644 tests/ExecuteNetwork/ArmNNExecutor.cpp
 create mode 100644 tests/ExecuteNetwork/ArmNNExecutor.hpp
 create mode 100644 tests/ExecuteNetwork/IExecutor.hpp
 create mode 100644 tests/ExecuteNetwork/TfliteExecutor.cpp
 create mode 100644 tests/ExecuteNetwork/TfliteExecutor.hpp
diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp
index 349c7e87b5..ecc888d9c1 100644
--- a/include/armnn/INetwork.hpp
+++ b/include/armnn/INetwork.hpp
@@ -801,8 +801,10 @@ public:
     size_t GetNumInputs() const;
     size_t GetNumOutputs() const;
 
+    void ExecuteStrategy(IStrategy& strategy) const;
+
     // Creates a copy of the IOptimizedNetwork. The IOptimizedNetwork will not be reoptimized,
-    // the provided ModelOptions will only be used when creating a LoadedNetwork. 
+    // the provided ModelOptions will only be used when creating a LoadedNetwork.
     IOptimizedNetwork(const IOptimizedNetwork& other, const ModelOptions& modelOptions);
     IOptimizedNetwork(std::unique_ptr<Graph> graph);
     IOptimizedNetwork(std::unique_ptr<OptimizedNetworkImpl> impl);
diff --git a/include/armnn/TypesUtils.hpp b/include/armnn/TypesUtils.hpp
index ccb0280457..ca7e7c58ac 100644
--- a/include/armnn/TypesUtils.hpp
+++ b/include/armnn/TypesUtils.hpp
@@ -301,16 +301,30 @@ inline std::ostream& operator<<(std::ostream& os, Status stat)
 }
 
 
-inline std::ostream & operator<<(std::ostream & os, const armnn::TensorShape & shape)
+inline std::ostream& operator<<(std::ostream& os, const armnn::TensorShape& shape)
 {
     os << "[";
-    for (uint32_t i=0; i<shape.GetNumDimensions(); ++i)
+    if (shape.GetDimensionality() != Dimensionality::NotSpecified)
     {
-        if (i!=0)
+        for (uint32_t i = 0; i < shape.GetNumDimensions(); ++i)
         {
-            os << ",";
+            if (i != 0)
+            {
+                os << ",";
+            }
+            if (shape.GetDimensionSpecificity(i))
+            {
+                os << shape[i];
+            }
+            else
+            {
+                os << "?";
+            }
         }
-        os << shape[i];
+    }
+    else
+    {
+        os << "Dimensionality Not Specified";
     }
     os << "]";
     return os;
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index ef9f4e7522..22fc0a3ed4 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -2949,4 +2949,17 @@ OptimizedNetworkImpl::~OptimizedNetworkImpl()
 {
 }
 
+void IOptimizedNetwork::ExecuteStrategy(IStrategy &strategy) const
+{
+    pOptimizedNetworkImpl->ExecuteStrategy(strategy);
+}
+
+void OptimizedNetworkImpl::ExecuteStrategy(IStrategy &strategy) const
+{
+    for (auto layer : GetGraph())
+    {
+        layer->ExecuteStrategy(strategy);
+    };
+}
+
 } // namespace armnn
diff --git a/src/armnn/OptimizedNetworkImpl.hpp b/src/armnn/OptimizedNetworkImpl.hpp
index cb0dc4c8cf..45809d5619 100644
--- a/src/armnn/OptimizedNetworkImpl.hpp
+++ b/src/armnn/OptimizedNetworkImpl.hpp
@@ -25,8 +25,11 @@ public:
     virtual size_t GetNumOutputs() const;
 
     Graph& GetGraph() { return *m_Graph; }
+    Graph& GetGraph() const { return *m_Graph; }
     ModelOptions& GetModelOptions() { return m_ModelOptions; }
 
+    void ExecuteStrategy(IStrategy& strategy) const;
+
 private:
     std::unique_ptr<Graph> m_Graph;
     arm::pipe::ProfilingGuid m_Guid;
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 9f377c6466..9ac9bcb636 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -144,6 +144,9 @@ if (BUILD_ARMNN_SERIALIZER
         OR BUILD_ONNX_PARSER
         OR BUILD_ARMNN_TFLITE_DELEGATE)
     set(ExecuteNetwork_sources
+        ExecuteNetwork/IExecutor.hpp
+        ExecuteNetwork/ArmNNExecutor.cpp
+        ExecuteNetwork/ArmNNExecutor.hpp
         ExecuteNetwork/ExecuteNetwork.cpp
         ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
         ExecuteNetwork/ExecuteNetworkProgramOptions.hpp
@@ -152,6 +155,13 @@ if (BUILD_ARMNN_SERIALIZER
         NetworkExecutionUtils/NetworkExecutionUtils.cpp
         NetworkExecutionUtils/NetworkExecutionUtils.hpp)
 
+    if(BUILD_ARMNN_TFLITE_DELEGATE)
+        set(ExecuteNetwork_sources
+            ${ExecuteNetwork_sources}
+            ExecuteNetwork/TfliteExecutor.cpp
+            ExecuteNetwork/TfliteExecutor.hpp)
+    endif()
+
     add_executable_ex(ExecuteNetwork ${ExecuteNetwork_sources})
     target_include_directories(ExecuteNetwork PRIVATE ../src/armnn)
     target_include_directories(ExecuteNetwork PRIVATE ../src/armnnUtils)
diff --git a/tests/ExecuteNetwork/ArmNNExecutor.cpp b/tests/ExecuteNetwork/ArmNNExecutor.cpp
new file mode 100644
index 0000000000..5be3383061
--- /dev/null
+++ b/tests/ExecuteNetwork/ArmNNExecutor.cpp
@@ -0,0 +1,805 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+
+#include "ArmNNExecutor.hpp"
+#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
+
+#include <armnn/IAsyncExecutionCallback.hpp>
+#include <AsyncExecutionCallback.hpp>
+
+
+using namespace armnn;
+using namespace std::chrono;
+
+ArmNNExecutor::ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions)
+: m_Params(params)
+{
+    runtimeOptions.m_EnableGpuProfiling = params.m_EnableProfiling;
+    runtimeOptions.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
+    m_Runtime = armnn::IRuntime::Create(runtimeOptions);
+
+    auto parser = CreateParser();
+    auto network = parser->CreateNetwork(m_Params);
+    auto optNet = OptimizeNetwork(network.get());
+
+    m_IOInfo = GetIOInfo(optNet.get());
+    SetupInputsAndOutputs();
+
+    std::string errorMsg;
+
+    armnn::ProfilingDetailsMethod profilingDetailsMethod = ProfilingDetailsMethod::Undefined;
+    if (params.m_OutputDetailsOnlyToStdOut)
+    {
+        profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
+    }
+    else if (params.m_OutputDetailsToStdOut)
+    {
+        profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
+    }
+
+    INetworkProperties networkProperties{m_Params.m_Concurrent,
+                                         MemorySource::Undefined,
+                                         MemorySource::Undefined,
+                                         params.m_EnableProfiling,
+                                         profilingDetailsMethod};
+
+    m_Runtime->LoadNetwork(m_NetworkId, std::move(optNet), errorMsg, networkProperties);
+
+    if (m_Params.m_Iterations > 1)
+    {
+        std::stringstream msg;
+        msg << "Network will be executed " << m_Params.m_Iterations;
+        if (m_Params.m_Concurrent)
+        {
+            msg << " times in an asynchronous manner. ";
+        }
+        else
+        {
+            msg << " times successively. ";
+        }
+        msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
+               "cover each execution.";
+        ARMNN_LOG(info) << msg.str();
+    }
+
+    if (m_Params.m_GenerateTensorData)
+    {
+        ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
+    }
+
+    if (m_Params.m_DontPrintOutputs)
+    {
+        ARMNN_LOG(info) << "Printing outputs to console is disabled.";
+    }
+}
+
+void ArmNNExecutor::ExecuteAsync()
+{
+    std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
+    std::unique_ptr<armnn::Threadpool> threadpool;
+    armnn::AsyncCallbackManager callbackManager;
+    std::unordered_map<armnn::InferenceId, const armnn::OutputTensors*> inferenceOutputMap;
+
+    for (size_t i = 0; i < m_Params.m_ThreadPoolSize; ++i)
+    {
+        memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkId));
+    }
+
+    threadpool = std::make_unique<armnn::Threadpool>(m_Params.m_ThreadPoolSize,
+                                                     m_Runtime.get(),
+                                                     memHandles);
+
+    ARMNN_LOG(info) << "Asynchronous Execution with Arm NN thread pool...  \n";
+    // Declare the latest and earliest inference times here to be used when calculating overall time
+    std::chrono::high_resolution_clock::time_point earliestStartTime =
+            std::chrono::high_resolution_clock::time_point::max();
+    std::chrono::high_resolution_clock::time_point latestEndTime =
+            std::chrono::high_resolution_clock::now();
+
+    // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
+    // LoadedNetwork with each scheduled inference having a specific priority
+    for (size_t i = 0; i < m_Params.m_Iterations; ++i)
+    {
+        std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
+
+        std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
+        inferenceOutputMap.insert({cb->GetInferenceId(), &m_OutputTensorsVec[i]});
+        threadpool->Schedule(m_NetworkId,
+                             m_InputTensorsVec[i],
+                             m_OutputTensorsVec[i],
+                             armnn::QosExecPriority::Medium,
+                             cb);
+    }
+
+    // Check the results
+    for (size_t iteration = 0; iteration < m_Params.m_Iterations; ++iteration)
+    {
+        auto cb = callbackManager.GetNotifiedCallback();
+
+        // Get the results
+        if (earliestStartTime > cb->GetStartTime())
+        {
+            earliestStartTime = cb->GetStartTime();
+        }
+        if (latestEndTime < cb->GetEndTime())
+        {
+            latestEndTime = cb->GetEndTime();
+        }
+
+        auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
+        auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
+        auto inferenceDuration = endTime - startTime;
+        CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
+        if(!m_Params.m_DontPrintOutputs)
+        {
+            const armnn::OutputTensors* out = inferenceOutputMap[cb->GetInferenceId()];
+            PrintOutputTensors(out, iteration);
+        }
+    }
+
+    // Print duration difference between overallStartTime and overallEndTime
+    auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
+    auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
+    auto totalInferenceDuration = overallEndTime - overallStartTime;
+    ARMNN_LOG(info) << "Overall Inference time: " << std::setprecision(2)
+                    << std::fixed << totalInferenceDuration.count() << " ms\n";
+
+}
+
+void ArmNNExecutor::ExecuteSync()
+{
+    for (size_t x = 0; x < m_Params.m_Iterations; x++)
+    {
+        std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
+
+        const auto start_time = armnn::GetTimeNow();
+        armnn::Status ret;
+        if (m_Params.m_ImportInputsIfAligned)
+        {
+             ret = m_Runtime->EnqueueWorkload(m_NetworkId,
+                                              m_InputTensorsVec[x],
+                                              m_OutputTensorsVec[x],
+                                              m_ImportedInputIds[x],
+                                              m_ImportedOutputIds[x]);
+        }
+        else
+        {
+            ret = m_Runtime->EnqueueWorkload(m_NetworkId,
+                                             m_InputTensorsVec[x],
+                                             m_OutputTensorsVec[x]);
+        }
+
+        const auto inferenceDuration = armnn::GetTimeDuration(start_time);
+
+        // If profiling is enabled print out the results
+        if(profiler && profiler->IsProfilingEnabled())
+        {
+            profiler->Print(std::cout);
+        }
+
+        if(ret == armnn::Status::Failure)
+        {
+            throw armnn::Exception("IRuntime::EnqueueWorkload failed");
+        }
+
+        if(!m_Params.m_DontPrintOutputs)
+        {
+            PrintOutputTensors(&m_OutputTensorsVec[x],  x);
+        }
+
+        // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
+        CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
+    }
+}
+
+std::vector<const void*> ArmNNExecutor::Execute()
+{
+    if(m_Params.m_ThreadPoolSize == 0)
+    {
+        ExecuteSync();
+    }
+    else
+    {
+        ExecuteAsync();
+    }
+    std::vector<const void*> results;
+    for (auto& output : m_OutputStorage)
+    {
+        results.push_back(output.m_Mem);
+    }
+
+    return results;
+}
+
+void ArmNNExecutor::PrintNetworkInfo()
+{
+    const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
+                                                 m_Params.m_InputNames :
+                                                 m_IOInfo.m_InputNames;
+    std::stringstream ss;
+    ss << "===== Network Info =====\n";
+    ss << "Inputs in order:\n";
+    for (const auto& inputName : inputNames)
+    {
+        const auto inputInfo = m_IOInfo.m_InputInfoMap[inputName].second;
+        ss <<  inputName << ", " << inputInfo.GetShape() << ", " << GetDataTypeName(inputInfo.GetDataType());
+        if (inputInfo.IsQuantized())
+        {
+            ss << " Quantization Offset: " << inputInfo.GetQuantizationOffset();
+            if (inputInfo.HasMultipleQuantizationScales())
+            {
+                ss << " Quantization scales: ";
+                for (const auto scale: inputInfo.GetQuantizationScales())
+                {
+                    ss << scale << ", ";
+                }
+            }
+            else
+            {
+                ss << " Quantization scale: " << inputInfo.GetQuantizationScale();
+            }
+        }
+        ss  << "\n";
+    }
+
+    ss << "Outputs in order:\n";
+    for (const auto& outputName : m_IOInfo.m_OutputNames)
+    {
+        const auto outputInfo = m_IOInfo.m_OutputInfoMap[outputName].second;
+        ss <<  outputName << ", " << outputInfo.GetShape() << ", " << GetDataTypeName(outputInfo.GetDataType());
+        if (outputInfo.IsQuantized())
+        {
+            ss << " Quantization Offset: " << outputInfo.GetQuantizationOffset();
+            if (outputInfo.HasMultipleQuantizationScales())
+            {
+                ss << " Quantization scales: ";
+                for (const auto scale: outputInfo.GetQuantizationScales())
+                {
+                    ss << scale << ", ";
+                }
+            }
+            else
+            {
+                ss << " Quantization scale: " << outputInfo.GetQuantizationScale();
+            }
+        }
+        ss  << "\n";
+    }
+
+    std::cout << ss.str() << std::endl;
+}
+
+void ArmNNExecutor::SetupInputsAndOutputs()
+{
+    const unsigned int noOfInputs = m_IOInfo.m_InputNames.size();
+
+    if (m_Params.m_InputNames.size() != 0 && m_Params.m_InputNames.size() != noOfInputs)
+    {
+        LogAndThrow("Number of input names does not match number of inputs");
+    }
+
+    const unsigned int inputFilePaths = m_Params.m_InputTensorDataFilePaths.size();
+    const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
+                                                 m_Params.m_InputNames :
+                                                 m_IOInfo.m_InputNames;
+    unsigned int noInputSets = 1;
+
+    if (inputFilePaths != 0)
+    {
+        if (inputFilePaths % noOfInputs != 0)
+        {
+            LogAndThrow("Number of input files: " + std::to_string(inputFilePaths) +
+                        " not compatible with number of inputs: " + std::to_string(noOfInputs));
+        }
+        noInputSets = inputFilePaths / noOfInputs;
+        if (noInputSets != 1 && m_Params.m_ReuseBuffers)
+        {
+            LogAndThrow("Specifying multiple sets of inputs not compatible with ReuseBuffers");
+        }
+    }
+
+    const unsigned int noOfOutputs = m_IOInfo.m_OutputNames.size();
+    const unsigned int outputFilePaths = m_Params.m_OutputTensorFiles.size();
+    unsigned int noOutputSets = 1;
+
+    if (outputFilePaths != 0)
+    {
+        if (outputFilePaths % noOfOutputs != 0)
+        {
+            LogAndThrow("Number of output files: " + std::to_string(outputFilePaths) +
+                        ", not compatible with number of outputs: " + std::to_string(noOfOutputs));
+        }
+        noOutputSets = outputFilePaths / noOfOutputs;
+
+        if (noOutputSets != 1 && m_Params.m_ReuseBuffers)
+        {
+            LogAndThrow("Specifying multiple sets of outputs not compatible with ReuseBuffers");
+        }
+    }
+
+    if (m_Params.m_ThreadPoolSize != 0)
+    {
+        // The current implementation of the Threadpool does not allow binding of outputs to a thread
+        // So to ensure no two threads write to the same output at the same time, no output can be reused
+        noOutputSets = m_Params.m_Iterations;
+    }
+
+    if (m_Params.m_InputTensorDataFilePaths.size() > noOfInputs)
+    {
+        ARMNN_LOG(info) << "Given network has " << noOfInputs << " input/s. One input-tensor-data file is required "
+                        << "for each input. The user provided "
+                        << m_Params.m_InputTensorDataFilePaths.size()
+                        << " input-tensor-data file/s which will be used to fill the input/s.\n";
+    }
+
+    unsigned int inputCount = 0;
+    for(unsigned int inputSet = 0; inputSet < noInputSets; ++inputSet)
+    {
+        armnn::InputTensors inputTensors;
+        for (const auto& inputName: inputNames)
+        {
+            armnn::BindingPointInfo bindingPointInfo;
+            try
+            {
+                bindingPointInfo = m_IOInfo.m_InputInfoMap.at(inputName);
+            }
+            catch (const std::out_of_range& e)
+            {
+                LogAndThrow("Input with inputName: " + inputName + " not found.");
+            }
+
+            const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
+            auto newInfo = armnn::TensorInfo{tensorInfo.GetShape(), tensorInfo.GetDataType(),
+                                             tensorInfo.GetQuantizationScale(),
+                                             tensorInfo.GetQuantizationOffset(),
+                                             true};
+
+            m_InputStorage.emplace_back(IOStorage{tensorInfo.GetNumBytes()});
+
+            const int bindingId = bindingPointInfo.first;
+            inputTensors.emplace_back(bindingId, armnn::ConstTensor{newInfo, m_InputStorage.back().m_Mem});
+
+            const armnn::Optional<std::string> dataFile = m_Params.m_GenerateTensorData ?
+                                                          armnn::EmptyOptional() :
+                                                          armnn::MakeOptional<std::string>(
+                                                                  m_Params.m_InputTensorDataFilePaths.at(inputCount++));
+
+            switch (tensorInfo.GetDataType())
+            {
+                case armnn::DataType::Float32:
+                {
+                    auto typedTensor = reinterpret_cast<float*>(m_InputStorage.back().m_Mem);
+                    PopulateTensorWithData<float>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
+                    break;
+                }
+                case armnn::DataType::QSymmS16:
+                {
+                    auto typedTensor = reinterpret_cast<int16_t*>(m_InputStorage.back().m_Mem);
+                    PopulateTensorWithData<int16_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
+                    break;
+                }
+                case armnn::DataType::QSymmS8:
+                case armnn::DataType::QAsymmS8:
+                {
+                    auto typedTensor = reinterpret_cast<int8_t*>(m_InputStorage.back().m_Mem);
+                    PopulateTensorWithData<int8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
+                    break;
+                }
+                case armnn::DataType::QAsymmU8:
+                {
+                    auto typedTensor = reinterpret_cast<uint8_t*>(m_InputStorage.back().m_Mem);
+                    PopulateTensorWithData<uint8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
+                    break;
+                }
+                case armnn::DataType::Signed32:
+                {
+                    auto typedTensor = reinterpret_cast<int32_t*>(m_InputStorage.back().m_Mem);
+                    PopulateTensorWithData<int32_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
+                    break;
+                }
+                default:
+                {
+                    LogAndThrow("Unexpected DataType");
+                }
+            }
+
+            if (m_Params.m_ImportInputsIfAligned)
+            {
+                m_ImportedInputIds.push_back(
+                    m_Runtime->ImportInputs(m_NetworkId, m_InputTensorsVec.back(), armnn::MemorySource::Malloc));
+            }
+        }
+        m_InputTensorsVec.emplace_back(inputTensors);
+    }
+
+    for(unsigned int outputSet = 0; outputSet < noOutputSets; ++outputSet)
+    {
+        armnn::OutputTensors outputTensors;
+        for (const auto& output: m_IOInfo.m_OutputInfoMap)
+        {
+            const armnn::BindingPointInfo& bindingPointInfo = output.second;
+            const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
+
+            m_OutputStorage.emplace_back(tensorInfo.GetNumBytes());
+            outputTensors.emplace_back(bindingPointInfo.first, armnn::Tensor{tensorInfo, m_OutputStorage.back().m_Mem});
+        }
+        m_OutputTensorsVec.emplace_back(outputTensors);
+        if (m_Params.m_ImportInputsIfAligned)
+        {
+            m_ImportedOutputIds.push_back(
+                    m_Runtime->ImportOutputs(m_NetworkId, m_OutputTensorsVec.back(), armnn::MemorySource::Malloc));
+        }
+    }
+
+    // Fill the remaining iterations with copies
+    const unsigned int remainingInputSets = m_Params.m_Iterations - noInputSets;
+    for (unsigned int i = 1; i <= remainingInputSets; i++)
+    {
+        m_InputTensorsVec.push_back(m_InputTensorsVec[noInputSets % i]);
+        if (m_Params.m_ImportInputsIfAligned)
+        {
+            m_ImportedInputIds.push_back(m_ImportedInputIds[noInputSets % i]);
+        }
+    }
+
+    const unsigned int remainingOutputSets = m_Params.m_Iterations - noOutputSets;
+    for (unsigned int i = 1; i <= remainingOutputSets; i++)
+    {
+        m_OutputTensorsVec.push_back(m_OutputTensorsVec[noOutputSets % i]);
+        if (m_Params.m_ImportInputsIfAligned)
+        {
+            m_ImportedOutputIds.push_back(m_ImportedOutputIds[noOutputSets % i]);
+        }
+    }
+}
+
+ArmNNExecutor::IOInfo ArmNNExecutor::GetIOInfo(armnn::IOptimizedNetwork* optNet)
+{
+    struct IOStrategy : armnn::IStrategy
+    {
+        void ExecuteStrategy(const armnn::IConnectableLayer* layer,
+                             const armnn::BaseDescriptor& descriptor,
+                             const std::vector<armnn::ConstTensor>& constants,
+                             const char* name,
+                             const armnn::LayerBindingId id = 0) override
+        {
+            armnn::IgnoreUnused(descriptor, constants, id);
+            switch (layer->GetType())
+            {
+                case armnn::LayerType::Input:
+                {
+                    m_IOInfo.m_InputNames.emplace_back(name);
+                    m_IOInfo.m_InputInfoMap[name] = {id, layer->GetOutputSlot(0).GetTensorInfo()};
+                    break;
+                }
+                case armnn::LayerType::Output:
+                {
+                    m_IOInfo.m_OutputNames.emplace_back(name);
+                    m_IOInfo.m_OutputInfoMap[name] = {id, layer->GetInputSlot(0).GetConnection()->GetTensorInfo()};
+                    break;
+                }
+                default: {}
+            }
+        }
+        IOInfo m_IOInfo;
+    };
+
+    IOStrategy ioStrategy;
+    optNet->ExecuteStrategy(ioStrategy);
+
+    return ioStrategy.m_IOInfo;
+}
+
+armnn::IOptimizedNetworkPtr ArmNNExecutor::OptimizeNetwork(armnn::INetwork* network)
+{
+    armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
+
+    armnn::OptimizerOptions options;
+    options.m_ReduceFp32ToFp16 = m_Params.m_EnableFp16TurboMode;
+    options.m_ReduceFp32ToBf16 = m_Params.m_EnableBf16TurboMode;
+    options.m_Debug = m_Params.m_PrintIntermediate;
+    options.m_shapeInferenceMethod = m_Params.m_InferOutputShape ?
+                                     armnn::ShapeInferenceMethod::InferAndValidate :
+                                     armnn::ShapeInferenceMethod::ValidateOnly;
+    options.m_ProfilingEnabled = m_Params.m_EnableProfiling;
+
+    armnn::BackendOptions gpuAcc("GpuAcc",
+                                 {
+                                         { "FastMathEnabled", m_Params.m_EnableFastMath },
+                                         { "SaveCachedNetwork", m_Params.m_SaveCachedNetwork },
+                                         { "CachedNetworkFilePath", m_Params.m_CachedNetworkFilePath },
+                                         { "MLGOTuningFilePath", m_Params.m_MLGOTuningFilePath }
+                                 });
+
+    armnn::BackendOptions cpuAcc("CpuAcc",
+                                 {
+                                         { "FastMathEnabled", m_Params.m_EnableFastMath },
+                                         { "NumberOfThreads", m_Params.m_NumberOfThreads }
+                                 });
+    options.m_ModelOptions.push_back(gpuAcc);
+    options.m_ModelOptions.push_back(cpuAcc);
+
+    const auto optimization_start_time = armnn::GetTimeNow();
+    optNet = armnn::Optimize(*network, m_Params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
+
+    ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
+                    << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
+
+    if (!optNet)
+    {
+        LogAndThrow("Optimize returned nullptr");
+    }
+
+    return optNet;
+}
+
+std::unique_ptr<ArmNNExecutor::IParser> ArmNNExecutor::CreateParser()
+{
+    // If no model format is given check the file name
+    const std::string& modelFormat = m_Params.m_ModelPath;
+
+    m_Params.m_IsModelBinary = modelFormat.find("json") == std::string::npos ? true : false;
+    std::unique_ptr<IParser> parser = nullptr;
+    // Forward to implementation based on the parser type
+    if (modelFormat.find("armnn") != std::string::npos)
+    {
+#if defined(ARMNN_SERIALIZER)
+        parser = std::make_unique<ArmNNDeserializer>();
+#else
+        LogAndThrow("Not built with serialization support.");
+#endif
+    }
+    else if(modelFormat.find("tflite") != std::string::npos)
+    {
+#if defined(ARMNN_TF_LITE_PARSER)
+        parser = std::make_unique<TfliteParser>(m_Params);
+#else
+        LogAndThrow("Not built with Tensorflow-Lite parser support.");
+#endif
+    }
+    else if (modelFormat.find("onnx") != std::string::npos)
+    {
+#if defined(ARMNN_ONNX_PARSER)
+        parser = std::make_unique<OnnxParser>();
+#else
+        LogAndThrow("Not built with Onnx parser support.");
+#endif
+    }
+
+    return parser;
+}
+
+void ArmNNExecutor::PrintOutputTensors(const armnn::OutputTensors* outputTensors,
+                                       unsigned int iteration)
+{
+    auto findOutputName = [&](const armnn::LayerBindingId id)
+    {
+        for (auto it = m_IOInfo.m_OutputInfoMap.begin(); it != m_IOInfo.m_OutputInfoMap.end(); ++it)
+        {
+            if (id == it->second.first)
+            {
+                return it->first;
+            }
+        }
+        return std::string{};
+    };
+
+    unsigned int outputIndex = 0;
+    unsigned int numOutputs = outputTensors->size();
+    for (const auto& output: *outputTensors)
+    {
+        const auto bindingName = findOutputName(output.first);
+        // We've made sure before that the number of output files either equals numOutputs, in which
+        // case we override those files when processing the results of each iteration (only the result
+        // of the last iteration will be stored), or there are enough
+        // output files for each output of each iteration.
+        size_t outputFileIndex = iteration * numOutputs + outputIndex;
+        if (!m_Params.m_OutputTensorFiles.empty())
+        {
+            outputFileIndex = outputFileIndex % m_Params.m_OutputTensorFiles.size();
+            ARMNN_LOG(info) << "Writing output: " << bindingName << " bindingId: '"
+                            << output.first
+                            << "' of iteration: " << iteration + 1 << " to file: '"
+                            << m_Params.m_OutputTensorFiles[outputFileIndex] << "'";
+        }
+
+        const armnn::Optional<std::string> outputTensorFile = m_Params.m_OutputTensorFiles.empty() ?
+                                                              armnn::EmptyOptional() :
+                                                              armnn::MakeOptional<std::string>(
+                                                                      m_Params.m_OutputTensorFiles[outputFileIndex]);
+
+        OutputWriteInfo outputWriteInfo
+        {
+            outputTensorFile,
+            bindingName,
+            output.second,
+            !m_Params.m_DontPrintOutputs
+        };
+
+        std::cout << bindingName << ": ";
+        std::vector<float> values;
+        switch (output.second.GetDataType())
+        {
+            case armnn::DataType::Float32:
+            {
+                PrintTensor<float>(outputWriteInfo, "%f ");
+                break;
+            }
+
+            case armnn::DataType::Signed32:
+            {
+                PrintTensor<int>(outputWriteInfo, "%d ");
+                break;
+            }
+            case armnn::DataType::QSymmS8:
+            case armnn::DataType::QAsymmS8:
+            {
+                PrintTensor<int8_t>(outputWriteInfo, "%d ");
+                break;
+            }
+            case armnn::DataType::QAsymmU8:
+            {
+                PrintTensor<uint8_t>(outputWriteInfo, "%d ");
+                break;
+            }
+            case armnn::DataType::Float16:
+            case armnn::DataType::QSymmS16:
+            case armnn::DataType::BFloat16:
+            case armnn::DataType::Boolean:
+            case armnn::DataType::Signed64:
+            default:
+            {
+                LogAndThrow("Unexpected DataType");
+            }
+        }
+        std::cout << "\n";
+    }
+}
+
+void ArmNNExecutor::CompareAndPrintResult(std::vector<const void*> otherOutput)
+{
+    unsigned int index = 0;
+
+    for (const auto& outputTensors: m_OutputTensorsVec)
+    {
+        for (const auto& outputTensor: outputTensors)
+        {
+            float result = 0;
+            size_t size = outputTensor.second.GetNumBytes();
+
+            switch (outputTensor.second.GetDataType())
+            {
+                case armnn::DataType::Float32:
+                {
+                    result = ComputeRMSE<float>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
+                    break;
+                }
+                case armnn::DataType::QSymmS16:
+                {
+                    result = ComputeRMSE<int16_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
+                    break;
+                }
+                case armnn::DataType::QSymmS8:
+                {
+                    result = ComputeRMSE<int8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
+                    break;
+                }
+                case armnn::DataType::QAsymmU8:
+                case armnn::DataType::QAsymmS8:
+                {
+                    result = ComputeRMSE<uint8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
+                    break;
+                }
+                default:
+                {
+                    LogAndThrow("Unexpected DataType");
+                }
+            }
+            std::cout << "RMSE: of " << result << "\n";
+        }
+    }
+}
+#if defined(ARMNN_SERIALIZER)
+ArmNNExecutor::ArmNNDeserializer::ArmNNDeserializer() : m_Parser(armnnDeserializer::IDeserializer::Create()){}
+
+armnn::INetworkPtr ArmNNExecutor::ArmNNDeserializer::CreateNetwork(const ExecuteNetworkParams& params)
+{
+    const std::string& modelPath = params.m_ModelPath;
+
+    std::ifstream file(modelPath, std::ios::binary);
+    return m_Parser->CreateNetworkFromBinary(file);
+}
+
+armnn::BindingPointInfo
+ArmNNExecutor::ArmNNDeserializer::GetInputBindingPointInfo(size_t, const std::string& inputName)
+{
+    armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkInputBindingInfo(0, inputName);
+    return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
+}
+
+armnn::BindingPointInfo
+ArmNNExecutor::ArmNNDeserializer::GetOutputBindingPointInfo(size_t, const std::string& outputName)
+{
+    armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkOutputBindingInfo(0, outputName);
+    return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
+}
+#endif
+
+#if defined(ARMNN_TF_LITE_PARSER)
+ArmNNExecutor::TfliteParser::TfliteParser(const ExecuteNetworkParams& params)
+{
+    armnnTfLiteParser::ITfLiteParser::TfLiteParserOptions options;
+    options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
+    options.m_InferAndValidate = params.m_InferOutputShape;
+
+    m_Parser = armnnTfLiteParser::ITfLiteParser::Create(options);
+}
+
+armnn::INetworkPtr ArmNNExecutor::TfliteParser::CreateNetwork(const ExecuteNetworkParams& params)
+{
+    const std::string& modelPath = params.m_ModelPath;
+    return m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str());
+}
+
+armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetInputBindingPointInfo(size_t subgraphId,
+                                                                              const std::string& inputName)
+{
+    return m_Parser->GetNetworkInputBindingInfo(subgraphId, inputName);
+}
+
+armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetOutputBindingPointInfo(size_t subgraphId,
+                                                                               const std::string& outputName)
+{
+    return m_Parser->GetNetworkOutputBindingInfo(subgraphId, outputName);
+}
+#endif
+
+
+#if defined(ARMNN_ONNX_PARSER)
+ArmNNExecutor::OnnxParser::OnnxParser() : m_Parser(armnnOnnxParser::IOnnxParser::Create()){}
+
+armnn::INetworkPtr ArmNNExecutor::OnnxParser::CreateNetwork(const ExecuteNetworkParams& params)
+{
+    const std::string& modelPath = params.m_ModelPath;
+    m_Parser = armnnOnnxParser::IOnnxParser::Create();
+    std::map<std::string, armnn::TensorShape> inputShapes;
+    if(!params.m_InputTensorShapes.empty())
+    {
+        const size_t numInputShapes = params.m_InputTensorShapes.size();
+        const size_t numInputBindings = params.m_InputNames.size();
+        if(numInputShapes < numInputBindings)
+        {
+            throw armnn::Exception(
+                    fmt::format("Not every input has its tensor shape specified: expected={0}, got={1}",
+                                numInputBindings, numInputShapes));
+        }
+
+        for (size_t i = 0; i < numInputShapes; i++)
+        {
+            inputShapes[params.m_InputNames[i]] = params.m_InputTensorShapes[i];
+        }
+
+        return params.m_IsModelBinary ?
+               m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
+               m_Parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes);
+    }
+
+    // Handle text and binary input differently by calling the corresponding parser function
+    return params.m_IsModelBinary ?
+           m_Parser->CreateNetworkFromBinaryFile(params.m_ModelPath.c_str()) :
+           m_Parser->CreateNetworkFromTextFile(params.m_ModelPath.c_str());
+}
+
+armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetInputBindingPointInfo(size_t, const std::string& inputName)
+{
+    return m_Parser->GetNetworkInputBindingInfo(inputName);
+}
+
+armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetOutputBindingPointInfo(size_t, const std::string& outputName)
+{
+    return m_Parser->GetNetworkOutputBindingInfo(outputName);
+}
+#endif
diff --git a/tests/ExecuteNetwork/ArmNNExecutor.hpp b/tests/ExecuteNetwork/ArmNNExecutor.hpp
new file mode 100644
index 0000000000..c4adc9e120
--- /dev/null
+++ b/tests/ExecuteNetwork/ArmNNExecutor.hpp
@@ -0,0 +1,161 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "IExecutor.hpp"
+#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
+#include "ExecuteNetworkProgramOptions.hpp"
+#include "armnn/utility/NumericCast.hpp"
+#include "armnn/utility/Timer.hpp"
+
+#include <armnn/ArmNN.hpp>
+#include <armnn/Threadpool.hpp>
+#include <armnn/Logging.hpp>
+#include <armnn/utility/Timer.hpp>
+#include <armnn/BackendRegistry.hpp>
+#include <armnn/utility/Assert.hpp>
+#include <armnn/utility/NumericCast.hpp>
+
+#include <armnnUtils/Filesystem.hpp>
+#include <HeapProfiling.hpp>
+
+#include <fmt/format.h>
+
+#if defined(ARMNN_SERIALIZER)
+#include "armnnDeserializer/IDeserializer.hpp"
+#endif
+#if defined(ARMNN_TF_LITE_PARSER)
+#include <armnnTfLiteParser/ITfLiteParser.hpp>
+#endif
+#if defined(ARMNN_ONNX_PARSER)
+#include <armnnOnnxParser/IOnnxParser.hpp>
+#endif
+
+class ArmNNExecutor : public IExecutor
+{
+public:
+    ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions);
+
+    std::vector<const void* > Execute() override;
+    void PrintNetworkInfo() override;
+    void CompareAndPrintResult(std::vector<const void*> otherOutput) override;
+
+private:
+
+    struct IParser;
+    struct IOInfo;
+    struct IOStorage;
+
+    using BindingPointInfo = armnn::BindingPointInfo;
+
+    std::unique_ptr<IParser> CreateParser();
+
+    void ExecuteAsync();
+    void ExecuteSync();
+    void SetupInputsAndOutputs();
+
+    IOInfo GetIOInfo(armnn::IOptimizedNetwork* optNet);
+
+    void PrintOutputTensors(const armnn::OutputTensors* outputTensors, unsigned int iteration);
+
+    armnn::IOptimizedNetworkPtr OptimizeNetwork(armnn::INetwork* network);
+
+    struct IOStorage
+    {
+        IOStorage(size_t size)
+        {
+            m_Mem = operator new(size);
+        }
+        ~IOStorage()
+        {
+            operator delete(m_Mem);
+        }
+        IOStorage(IOStorage&& rhs)
+        {
+            this->m_Mem = rhs.m_Mem;
+            rhs.m_Mem = nullptr;
+        }
+
+        IOStorage(const IOStorage& rhs) = delete;
+        IOStorage& operator=(IOStorage& rhs) = delete;
+        IOStorage& operator=(IOStorage&& rhs) = delete;
+
+        void* m_Mem;
+    };
+
+    struct IOInfo
+    {
+        std::vector<std::string> m_InputNames;
+        std::vector<std::string> m_OutputNames;
+        std::map<std::string, armnn::BindingPointInfo> m_InputInfoMap;
+        std::map<std::string, armnn::BindingPointInfo> m_OutputInfoMap;
+    };
+
+    IOInfo m_IOInfo;
+    std::vector<IOStorage> m_InputStorage;
+    std::vector<IOStorage> m_OutputStorage;
+    std::vector<armnn::InputTensors> m_InputTensorsVec;
+    std::vector<armnn::OutputTensors> m_OutputTensorsVec;
+    std::vector<std::vector<unsigned int>> m_ImportedInputIds;
+    std::vector<std::vector<unsigned int>> m_ImportedOutputIds;
+    std::shared_ptr<armnn::IRuntime> m_Runtime;
+    armnn::NetworkId m_NetworkId;
+    ExecuteNetworkParams m_Params;
+
+    struct IParser
+    {
+        virtual armnn::INetworkPtr CreateNetwork(const ExecuteNetworkParams& params) = 0;
+        virtual armnn::BindingPointInfo GetInputBindingPointInfo(size_t id, const std::string& inputName) = 0;
+        virtual armnn::BindingPointInfo GetOutputBindingPointInfo(size_t id, const std::string& outputName) = 0;
+
+        virtual ~IParser(){};
+    };
+
+#if defined(ARMNN_SERIALIZER)
+    class ArmNNDeserializer : public IParser
+    {
+        public:
+        ArmNNDeserializer();
+
+        armnn::INetworkPtr CreateNetwork(const ExecuteNetworkParams& params) override;
+        armnn::BindingPointInfo GetInputBindingPointInfo(size_t, const std::string& inputName) override;
+        armnn::BindingPointInfo GetOutputBindingPointInfo(size_t, const std::string& outputName) override;
+
+        private:
+        armnnDeserializer::IDeserializerPtr m_Parser;
+    };
+#endif
+
+#if defined(ARMNN_TF_LITE_PARSER)
+    class TfliteParser : public IParser
+    {
+    public:
+        TfliteParser(const ExecuteNetworkParams& params);
+
+        armnn::INetworkPtr CreateNetwork(const ExecuteNetworkParams& params) override;
+        armnn::BindingPointInfo GetInputBindingPointInfo(size_t subgraphId, const std::string& inputName) override;
+        armnn::BindingPointInfo GetOutputBindingPointInfo(size_t subgraphId, const std::string& outputName) override;
+
+    private:
+        armnnTfLiteParser::ITfLiteParserPtr m_Parser{nullptr, [](armnnTfLiteParser::ITfLiteParser*){}};
+    };
+#endif
+
+#if defined(ARMNN_ONNX_PARSER)
+    class OnnxParser : public IParser
+    {
+        public:
+        OnnxParser();
+
+        armnn::INetworkPtr CreateNetwork(const ExecuteNetworkParams& params) override;
+        armnn::BindingPointInfo GetInputBindingPointInfo(size_t subgraphId, const std::string& inputName) override;
+        armnn::BindingPointInfo GetOutputBindingPointInfo(size_t subgraphId, const std::string& outputName) override;
+
+        private:
+        armnnOnnxParser::IOnnxParserPtr m_Parser;
+    };
+#endif
+};
\ No newline at end of file
diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp
index 02a21c30cf..e9ebd0db8e 100644
--- a/tests/ExecuteNetwork/ExecuteNetwork.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp
@@ -1,1077 +1,91 @@
 //
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
-#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
 #include "ExecuteNetworkProgramOptions.hpp"
-#include <armnn/IAsyncExecutionCallback.hpp>
-#include <AsyncExecutionCallback.hpp>
-
-#include <armnn/Logging.hpp>
-#include <armnnUtils/Filesystem.hpp>
-#include <armnnUtils/TContainer.hpp>
-#include <ProfilingOptionsConverter.hpp>
-#include <InferenceTest.hpp>
-
-#if defined(ARMNN_SERIALIZER)
-#include "armnnDeserializer/IDeserializer.hpp"
-#endif
-#if defined(ARMNN_TF_LITE_PARSER)
-#include "armnnTfLiteParser/ITfLiteParser.hpp"
-#endif
-#if defined(ARMNN_ONNX_PARSER)
-#include "armnnOnnxParser/IOnnxParser.hpp"
-#endif
+#include "ArmNNExecutor.hpp"
 #if defined(ARMNN_TFLITE_DELEGATE)
-#include <armnn_delegate.hpp>
-#include <DelegateOptions.hpp>
-
-#include <tensorflow/lite/builtin_ops.h>
-#include <tensorflow/lite/c/builtin_op_data.h>
-#include <tensorflow/lite/c/common.h>
-#include <tensorflow/lite/optional_debug_tools.h>
-#include <tensorflow/lite/kernels/builtin_op_kernels.h>
-#include <tensorflow/lite/interpreter.h>
-#include <tensorflow/lite/kernels/register.h>
+#include "TfliteExecutor.hpp"
 #endif
+#include <armnn/Logging.hpp>
 
-#include <future>
 
-/**
- * Given a measured duration and a threshold time tell the user whether we succeeded or not.
- *
- * @param duration the measured inference duration.
- * @param thresholdTime the threshold time in milliseconds.
- * @return false if the measured time exceeded the threshold.
- */
-bool CheckInferenceTimeThreshold(const std::chrono::duration<double, std::milli>& duration,
-                                 const double& thresholdTime)
+std::unique_ptr<IExecutor> BuildExecutor(ProgramOptions& programOptions)
 {
-    ARMNN_LOG(info) << "Inference time: " << std::setprecision(2)
-                    << std::fixed << duration.count() << " ms\n";
-    // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
-    if (thresholdTime != 0.0)
+    if (programOptions.m_ExNetParams.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate ||
+        programOptions.m_ExNetParams.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::TfliteInterpreter)
     {
-        ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
-                        << std::fixed << thresholdTime << " ms";
-        auto thresholdMinusInference = thresholdTime - duration.count();
-        ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
-                        << std::fixed << thresholdMinusInference << " ms" << "\n";
-       if (thresholdMinusInference < 0)
-        {
-            std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
-            ARMNN_LOG(fatal) << errorMessage;
-            return false;
-        }
-    }
-    return true;
-}
-
 #if defined(ARMNN_TFLITE_DELEGATE)
-int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params, const armnn::IRuntime::CreationOptions runtimeOptions)
-{
-    // Build model and corresponding interpreter
-    using namespace tflite;
-
-    std::unique_ptr<tflite::FlatBufferModel> model = tflite::FlatBufferModel::BuildFromFile(params.m_ModelPath.c_str());
-
-    auto tfLiteInterpreter =  std::make_unique<Interpreter>();
-    tflite::ops::builtin::BuiltinOpResolver resolver;
-
-    tflite::InterpreterBuilder builder(*model, resolver);
-    builder(&tfLiteInterpreter);
-    tfLiteInterpreter->AllocateTensors();
-
-    int status = 0;
-
-    // Create & populate Armnn Delegate, then register it to TfLiteInterpreter
-    if (params.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate)
-    {
-        // Create the Armnn Delegate
-        // Populate a DelegateOptions from the ExecuteNetworkParams.
-        armnnDelegate::DelegateOptions delegateOptions = params.ToDelegateOptions();
-        delegateOptions.SetExternalProfilingParams(
-            arm::pipe::ConvertExternalProfilingOptions(runtimeOptions.m_ProfilingOptions));
-
-        std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
-                theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
-                                 armnnDelegate::TfLiteArmnnDelegateDelete);
-        // Register armnn_delegate to TfLiteInterpreter
-        status = tfLiteInterpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
-        if (status != kTfLiteOk)
-        {
-            ARMNN_LOG(fatal) << "Could not register ArmNN TfLite Delegate to TfLiteInterpreter!";
-            return EXIT_FAILURE;
-        }
-    }
-    else
-    {
-        std::cout << "Running on TfLite without ArmNN delegate\n";
-    }
-
-    const size_t numInputs = params.m_InputNames.size();
-    // Populate input tensor of interpreter
-    for(unsigned int inputIndex = 0; inputIndex < numInputs; ++inputIndex)
-    {
-        // Load (or generate) input data for inference
-        armnn::Optional<std::string> dataFile = params.m_GenerateTensorData ? armnn::EmptyOptional() :
-            armnn::MakeOptional<std::string>(params.m_InputTensorDataFilePaths[inputIndex]);
-
-        int input = tfLiteInterpreter->inputs()[inputIndex];
-        TfLiteIntArray* inputDims = tfLiteInterpreter->tensor(input)->dims;
-
-        unsigned int inputSize = 1;
-        if (params.m_InputTensorShapes.size() > 0)
-        {
-            inputSize = params.m_InputTensorShapes[inputIndex]->GetNumElements();
-        }
-        else
-        {
-            for (unsigned int dim = 0; dim < static_cast<unsigned int>(inputDims->size); ++dim)
-            {
-                inputSize *= inputDims->data[dim];
-            }
-        }
-
-        if (params.m_InputTypes[inputIndex].compare("float") == 0)
-        {
-            auto inputData = tfLiteInterpreter->typed_tensor<float>(input);
-
-            if(inputData == NULL)
-            {
-                ARMNN_LOG(fatal) << "Input tensor is null, input type: "
-                                    "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
-                return EXIT_FAILURE;
-            }
-
-            std::vector<float> tensorData;
-            PopulateTensorWithDataGeneric<float>(tensorData,
-                                                 inputSize,
-                                                 dataFile,
-                                                 [](const std::string& s)
-                                                 { return std::stof(s); });
-
-            std::copy(tensorData.begin(), tensorData.end(), inputData);
-        }
-        else if (params.m_InputTypes[inputIndex].compare("qsymms8") == 0 ||
-                 params.m_InputTypes[inputIndex].compare("qasymms8") == 0)
-        {
-            auto inputData = tfLiteInterpreter->typed_tensor<int8_t>(input);
-
-            if(inputData == NULL)
-            {
-                ARMNN_LOG(fatal) << "Input tensor is null, input type: "
-                                    "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
-                return EXIT_FAILURE;
-            }
-
-            std::vector<int8_t> tensorData;
-            PopulateTensorWithDataGeneric<int8_t>(tensorData,
-                                                  inputSize,
-                                                  dataFile,
-                                                  [](const std::string& s)
-                                                  { return armnn::numeric_cast<int8_t>(std::stoi(s)); });
-
-            std::copy(tensorData.begin(), tensorData.end(), inputData);
-        }
-        else if (params.m_InputTypes[inputIndex].compare("int") == 0)
-        {
-            auto inputData = tfLiteInterpreter->typed_tensor<int32_t>(input);
-
-            if(inputData == NULL)
-            {
-                ARMNN_LOG(fatal) << "Input tensor is null, input type: "
-                                    "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
-                return EXIT_FAILURE;
-            }
-
-            std::vector<int32_t> tensorData;
-            PopulateTensorWithDataGeneric<int32_t>(tensorData,
-                                                   inputSize,
-                                                   dataFile,
-                                                   [](const std::string& s)
-                                                   { return std::stoi(s); });
-
-            std::copy(tensorData.begin(), tensorData.end(), inputData);
-        }
-        else if (params.m_InputTypes[inputIndex].compare("qasymm8") == 0 ||
-                 params.m_InputTypes[inputIndex].compare("qasymmu8") == 0)
-        {
-            auto inputData = tfLiteInterpreter->typed_tensor<uint8_t>(input);
-
-            if(inputData == NULL)
-            {
-                ARMNN_LOG(fatal) << "Input tensor is null, input type: "
-                                    "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
-                return EXIT_FAILURE;
-            }
-
-            std::vector<uint8_t> tensorData;
-            PopulateTensorWithDataGeneric<uint8_t>(tensorData,
-                                                   inputSize,
-                                                   dataFile,
-                                                   [](const std::string& s)
-                                                   { return armnn::numeric_cast<uint8_t>(std::stoi(s)); });
-
-            std::copy(tensorData.begin(), tensorData.end(), inputData);
-        }
-        else
-        {
-            ARMNN_LOG(fatal) << "Unsupported input tensor data type \"" << params.m_InputTypes[inputIndex] << "\". ";
-            return EXIT_FAILURE;
-        }
-    }
-
-    // Run inference, print the output of the inference
-    for (size_t x = 0; x < params.m_Iterations; x++)
-    {
-        // Start timer to record inference time in milliseconds.
-        const auto start_time = armnn::GetTimeNow();
-        // Run the inference
-        status = tfLiteInterpreter->Invoke();
-        const auto duration = armnn::GetTimeDuration(start_time);
-
-        // The TFLite interpreter's outputs might be in a different order than the user inputted output names.
-        std::map<unsigned int, int> paramToTfliteOutputIndex;
-        for (unsigned int paramIndex = 0; paramIndex < params.m_OutputNames.size(); ++paramIndex)
-        {
-            paramToTfliteOutputIndex[paramIndex] = -1;
-            for (unsigned int tfLiteIndex = 0; tfLiteIndex < tfLiteInterpreter->outputs().size(); ++tfLiteIndex)
-            {
-                if (params.m_OutputNames[paramIndex] == tfLiteInterpreter->GetOutputName(tfLiteIndex))
-                {
-                    paramToTfliteOutputIndex[paramIndex] = tfLiteIndex;
-                }
-            }
-        }
-
-        // Print out the output
-        for (unsigned int paramOutputIndex = 0; paramOutputIndex < params.m_OutputNames.size(); ++paramOutputIndex)
-        {
-            int outputIndex = paramToTfliteOutputIndex[paramOutputIndex];
-            if (outputIndex == -1)
-            {
-                std::cout << fmt::format("Output name: {} doesn't exist.", params.m_OutputNames[paramOutputIndex]) <<
-                std::endl;
-                continue;
-            }
-            auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[outputIndex];
-            TfLiteIntArray* outputDims = tfLiteInterpreter->tensor(tfLiteDelegateOutputId)->dims;
-            // If we've been asked to write to a file then set a file output stream. Otherwise use stdout.
-            FILE* outputTensorFile = stdout;
-            if (!params.m_OutputTensorFiles.empty())
-            {
-                outputTensorFile = fopen(params.m_OutputTensorFiles[outputIndex].c_str(), "w");
-                if (outputTensorFile == NULL)
-                {
-                    ARMNN_LOG(fatal) << "Specified output tensor file, \"" <<
-                                     params.m_OutputTensorFiles[outputIndex] <<
-                                     "\", cannot be created. Defaulting to stdout. " <<
-                                     "Error was: " << std::strerror(errno);
-                    outputTensorFile = stdout;
-                }
-                else
-                {
-                    ARMNN_LOG(info) << "Writing output " << outputIndex << "' of iteration: " << x+1 << " to file: '"
-                                    << params.m_OutputTensorFiles[outputIndex] << "'";
-                }
-            }
-            long outputSize = 1;
-            for (unsigned int dim = 0; dim < static_cast<unsigned int>(outputDims->size); ++dim)
-            {
-                outputSize *=  outputDims->data[dim];
-            }
-
-            std::cout << tfLiteInterpreter->GetOutputName(outputIndex) << ": ";
-            if (params.m_OutputTypes[paramOutputIndex].compare("float") == 0)
-            {
-                auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<float>(tfLiteDelegateOutputId);
-                if(tfLiteDelageOutputData == NULL)
-                {
-                    ARMNN_LOG(fatal) << "Output tensor is null, output type: "
-                                        "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect.";
-                    return EXIT_FAILURE;
-                }
-
-                if (!params.m_DontPrintOutputs)
-                {
-                    for (int i = 0; i < outputSize; ++i)
-                    {
-                        fprintf(outputTensorFile, "%f ", tfLiteDelageOutputData[i]);
-                    }
-                }
-            }
-            else if (params.m_OutputTypes[paramOutputIndex].compare("int") == 0)
-            {
-                auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<int32_t>(tfLiteDelegateOutputId);
-                if(tfLiteDelageOutputData == NULL)
-                {
-                    ARMNN_LOG(fatal) << "Output tensor is null, output type: "
-                                        "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect.";
-                    return EXIT_FAILURE;
-                }
-
-                if (!params.m_DontPrintOutputs)
-                {
-                    for (int i = 0; i < outputSize; ++i)
-                    {
-                        fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]);
-                    }
-                }
-            }
-            else if (params.m_OutputTypes[paramOutputIndex].compare("qsymms8") == 0 ||
-                     params.m_OutputTypes[paramOutputIndex].compare("qasymms8") == 0)
-            {
-                auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<int8_t>(tfLiteDelegateOutputId);
-                if(tfLiteDelageOutputData == NULL)
-                {
-                    ARMNN_LOG(fatal) << "Output tensor is null, output type: "
-                                        "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect.";
-                    return EXIT_FAILURE;
-                }
-
-                if (!params.m_DontPrintOutputs)
-                {
-                    for (int i = 0; i < outputSize; ++i)
-                    {
-                        fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]);
-                    }
-                }
-            }
-            else if (params.m_OutputTypes[paramOutputIndex].compare("qasymm8") == 0 ||
-                     params.m_OutputTypes[paramOutputIndex].compare("qasymmu8") == 0)
-            {
-                auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<uint8_t>(tfLiteDelegateOutputId);
-                if(tfLiteDelageOutputData == NULL)
-                {
-                    ARMNN_LOG(fatal) << "Output tensor is null, output type: "
-                                        "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect.";
-                    return EXIT_FAILURE;
-                }
-
-                if (!params.m_DontPrintOutputs)
-                {
-                    for (int i = 0; i < outputSize; ++i)
-                    {
-                        fprintf(outputTensorFile, "%u ", tfLiteDelageOutputData[i]);
-                    }
-                }
-            }
-            else
-            {
-                ARMNN_LOG(fatal) << "Output tensor is null, output type: "
-                                    "\"" << params.m_OutputTypes[paramOutputIndex] <<
-                                 "\" may be incorrect. Output type can be specified with -z argument";
-                return EXIT_FAILURE;
-            }
-            std::cout << std::endl;
-        }
-        CheckInferenceTimeThreshold(duration, params.m_ThresholdTime);
-    }
-
-    return status;
-}
+        return std::make_unique<TfLiteExecutor>(programOptions.m_ExNetParams);
+#else
+        ARMNN_LOG(fatal) << "Not built with Arm NN Tensorflow-Lite delegate support.";
+            return nullptr;
 #endif
-template<typename TParser, typename TDataType>
-int MainImpl(const ExecuteNetworkParams& params,
-             const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
-{
-    using namespace std::chrono;
-
-    std::vector<std::vector<armnnUtils::TContainer>> inputs;
-    std::vector<std::vector<armnnUtils::TContainer>> outputs;
-
-    try
-    {
-        // Creates an InferenceModel, which will parse the model and load it into an IRuntime.
-        typename InferenceModel<TParser, TDataType>::Params inferenceModelParams;
-        inferenceModelParams.m_ModelPath                      = params.m_ModelPath;
-        inferenceModelParams.m_AllowExpandedDims              = params.m_AllowExpandedDims;
-        inferenceModelParams.m_IsModelBinary                  = params.m_IsModelBinary;
-        inferenceModelParams.m_ComputeDevices                 = params.m_ComputeDevices;
-        inferenceModelParams.m_DynamicBackendsPath            = params.m_DynamicBackendsPath;
-        inferenceModelParams.m_PrintIntermediateLayers        = params.m_PrintIntermediate;
-        inferenceModelParams.m_VisualizePostOptimizationModel = params.m_EnableLayerDetails;
-        inferenceModelParams.m_ParseUnsupported               = params.m_ParseUnsupported;
-        inferenceModelParams.m_InferOutputShape               = params.m_InferOutputShape;
-        inferenceModelParams.m_EnableFastMath                 = params.m_EnableFastMath;
-        inferenceModelParams.m_SaveCachedNetwork              = params.m_SaveCachedNetwork;
-        inferenceModelParams.m_CachedNetworkFilePath          = params.m_CachedNetworkFilePath;
-        inferenceModelParams.m_NumberOfThreads                = params.m_NumberOfThreads;
-        inferenceModelParams.m_MLGOTuningFilePath             = params.m_MLGOTuningFilePath;
-        inferenceModelParams.m_AsyncEnabled                   = params.m_Concurrent;
-        inferenceModelParams.m_ThreadPoolSize                 = params.m_ThreadPoolSize;
-        inferenceModelParams.m_OutputDetailsToStdOut          = params.m_OutputDetailsToStdOut;
-        inferenceModelParams.m_OutputDetailsOnlyToStdOut      = params.m_OutputDetailsOnlyToStdOut;
-        inferenceModelParams.m_ImportInputsIfAligned          = params.m_ImportInputsIfAligned;
-
-        for(const std::string& inputName: params.m_InputNames)
-        {
-            inferenceModelParams.m_InputBindings.push_back(inputName);
-        }
-
-        for(unsigned int i = 0; i < params.m_InputTensorShapes.size(); ++i)
-        {
-            inferenceModelParams.m_InputShapes.push_back(*params.m_InputTensorShapes[i]);
-        }
-
-        for(const std::string& outputName: params.m_OutputNames)
-        {
-            inferenceModelParams.m_OutputBindings.push_back(outputName);
-        }
-
-        inferenceModelParams.m_SubgraphId          = params.m_SubgraphId;
-        inferenceModelParams.m_EnableFp16TurboMode = params.m_EnableFp16TurboMode;
-        inferenceModelParams.m_EnableBf16TurboMode = params.m_EnableBf16TurboMode;
-
-        InferenceModel<TParser, TDataType> model(inferenceModelParams,
-                                                 params.m_EnableProfiling,
-                                                 params.m_DynamicBackendsPath,
-                                                 runtime);
-
-        const size_t numInputs = inferenceModelParams.m_InputBindings.size();
-
-        armnn::Optional<QuantizationParams> qParams = params.m_QuantizeInput ?
-                                                      armnn::MakeOptional<QuantizationParams>(
-                                                          model.GetInputQuantizationParams()) :
-                                                      armnn::EmptyOptional();
-
-        if (params.m_InputTensorDataFilePaths.size() > numInputs)
-        {
-            ARMNN_LOG(info) << "Given network has " << numInputs << " input/s. One input-tensor-data file is required "
-                            << "for each input. The user provided "
-                            << params.m_InputTensorDataFilePaths.size()
-                            << " input-tensor-data file/s which will be used to fill the input/s.\n";
-        }
-
-        const size_t numOutputs = inferenceModelParams.m_OutputBindings.size();
-
-        // The user is allowed to specify the data type of each output tensor. It is used here to construct the
-        // result tensors for each iteration. It is possible for the user to specify a type that does not match
-        // the data type of the corresponding model output. It may not make sense, but it is historically allowed.
-        // The potential problem here is a buffer overrun when a larger data type is written into the space for a
-        // smaller one. Issue a warning to highlight the potential problem.
-        for (unsigned int outputIdx = 0; outputIdx < model.GetOutputBindingInfos().size(); ++outputIdx)
-        {
-            armnn::DataType type = model.GetOutputBindingInfo(outputIdx).second.GetDataType();
-            switch (type)
-            {
-                // --output-type only supports float, int,  qasymms8 or qasymmu8.
-                case armnn::DataType::Float32:
-                    if (params.m_OutputTypes[outputIdx].compare("float") != 0)
-                    {
-                        ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Float32. The "
-                                           << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
-                                           ". This may cause unexpected problems or random failures.";
-                    }
-                    break;
-                case armnn::DataType::QAsymmU8:
-                    if (params.m_OutputTypes[outputIdx].compare("qasymmu8") != 0)
-                    {
-                        ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmU8. The "
-                                           << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
-                                           ". This may cause unexpected problems or random failures.";
-                    }
-                    break;
-                case armnn::DataType::Signed32:
-                    if (params.m_OutputTypes[outputIdx].compare("int") != 0)
-                    {
-                        ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Signed32. The "
-                                           << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
-                                           ". This may cause unexpected problems or random failures.";
-                    }
-                    break;
-                case armnn::DataType::QAsymmS8:
-                    if (params.m_OutputTypes[outputIdx].compare("qasymms8") != 0)
-                    {
-                        ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmS8. The "
-                                           << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
-                                           ". This may cause unexpected problems or random failures.";
-                    }
-                    break;
-                default:
-                    break;
-            }
-        }
-
-        if (!params.m_ReuseBuffers)
-        {
-            for (unsigned int j = 0; j < params.m_Iterations; ++j)
-            {
-                std::vector<armnnUtils::TContainer> inputDataContainers;
-                for (unsigned int i = 0; i < numInputs; ++i)
-                {
-                    // If there are fewer input files given than required for the execution of
-                    // params.m_Iterations we simply start with the first input file again
-                    size_t inputFileIndex = j * numInputs + i;
-                    if (!params.m_InputTensorDataFilePaths.empty())
-                    {
-                        inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size();
-                    }
-
-                    armnn::Optional<std::string> dataFile = params.m_GenerateTensorData ?
-                                                            armnn::EmptyOptional() :
-                                                            armnn::MakeOptional<std::string>(
-                                                                    params.m_InputTensorDataFilePaths.at(
-                                                                            inputFileIndex));
-
-                    unsigned int numElements = model.GetInputSize(i);
-                    if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i])
-                    {
-                        // If the user has provided a tensor shape for the current input,
-                        // override numElements
-                        numElements = params.m_InputTensorShapes[i]->GetNumElements();
-                    }
-
-                    armnnUtils::TContainer tensorData;
-                    PopulateTensorWithData(tensorData,
-                                           numElements,
-                                           params.m_InputTypes[i],
-                                           qParams,
-                                           dataFile);
-
-                    inputDataContainers.push_back(tensorData);
-                }
-                inputs.push_back(inputDataContainers);
-            }
-
-            for (unsigned int j = 0; j < params.m_Iterations; ++j)
-            {
-                std::vector<armnnUtils::TContainer> outputDataContainers;
-                for (unsigned int i = 0; i < numOutputs; ++i)
-                {
-                    if (params.m_OutputTypes[i].compare("float") == 0)
-                    {
-                        outputDataContainers.push_back(std::vector<float>(model.GetOutputSize(i)));
-                    }
-                    else if (params.m_OutputTypes[i].compare("int") == 0)
-                    {
-                        outputDataContainers.push_back(std::vector<int>(model.GetOutputSize(i)));
-                    }
-                    else if (params.m_OutputTypes[i].compare("qasymm8") == 0 ||
-                               params.m_OutputTypes[i].compare("qasymmu8") == 0)
-                    {
-                        outputDataContainers.push_back(std::vector<uint8_t>(model.GetOutputSize(i)));
-                    }
-                    else if (params.m_OutputTypes[i].compare("qasymms8") == 0)
-                    {
-                        outputDataContainers.push_back(std::vector<int8_t>(model.GetOutputSize(i)));
-                    }
-                    else
-                    {
-                        ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". ";
-                        return EXIT_FAILURE;
-                    }
-                }
-                outputs.push_back(outputDataContainers);
-            }
-        }
-        if (params.m_Iterations > 1)
-        {
-            std::stringstream msg;
-            msg << "Network will be executed " << params.m_Iterations;
-            if (params.m_Concurrent)
-            {
-                msg << " times in an asynchronous manner. ";
-            }
-            else
-            {
-                msg << " times successively. ";
-            }
-            msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
-                   "cover each execution.";
-            ARMNN_LOG(info) << msg.str();
-        }
-
-        // Synchronous execution
-        if (!params.m_Concurrent && !params.m_ReuseBuffers)
-        {
-            for (size_t x = 0; x < params.m_Iterations; x++)
-            {
-                // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
-                auto inference_duration = model.Run(inputs[x], outputs[x]);
-
-                if (params.m_GenerateTensorData)
-                {
-                    ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
-                }
-                if (params.m_DontPrintOutputs)
-                {
-                    ARMNN_LOG(info) << "Printing outputs to console is disabled.";
-                }
-
-                // Print output tensors
-                const auto& infosOut = model.GetOutputBindingInfos();
-                for (size_t i = 0; i < numOutputs; i++)
-                {
-                    const armnn::TensorInfo& infoOut = infosOut[i].second;
-
-                    // We've made sure before that the number of output files either equals numOutputs, in which
-                    // case we override those files when processing the results of each iteration (only the result
-                    // of the last iteration will be stored), or there are enough
-                    // output files for each output of each iteration.
-                    size_t outputFileIndex = x * numOutputs + i;
-                    if (!params.m_OutputTensorFiles.empty())
-                    {
-                        outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
-                        ARMNN_LOG(info) << "Writing output " << i << " named: '"
-                                        << inferenceModelParams.m_OutputBindings[i]
-                                        << "' of iteration: " << x+1 << " to file: '"
-                                        << params.m_OutputTensorFiles[outputFileIndex] << "'";
-                    }
-                    auto outputTensorFile = params.m_OutputTensorFiles.empty()
-                                            ? ""
-                                            : params.m_OutputTensorFiles[outputFileIndex];
-
-                    TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
-                                          infoOut,
-                                          outputTensorFile,
-                                          params.m_DequantizeOutput,
-                                          !params.m_DontPrintOutputs);
-                    mapbox::util::apply_visitor(printer, outputs[x][i]);
-                }
-
-                ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
-                                << std::fixed << inference_duration.count() << " ms\n";
-
-                // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
-                if (params.m_ThresholdTime != 0.0)
-                {
-                    ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
-                                    << std::fixed << params.m_ThresholdTime << " ms";
-                    auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count();
-                    ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
-                                    << std::fixed << thresholdMinusInference << " ms" << "\n";
-
-                    if (thresholdMinusInference < 0)
-                    {
-                        std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
-                        ARMNN_LOG(fatal) << errorMessage;
-                    }
-                }
-            }
-        }
-        // Synchronous Execution using a single buffer for input and output data
-        else if(!params.m_Concurrent)
-        {
-            std::vector<armnnUtils::TContainer> input;
-            std::vector<armnnUtils::TContainer> output;
-
-            for (unsigned int i = 0; i < numInputs; ++i)
-            {
-                // If there are fewer input files given than required for the execution of
-                // params.m_Iterations we simply start with the first input file again
-                size_t inputFileIndex = numInputs + i;
-                if (!params.m_InputTensorDataFilePaths.empty())
-                {
-                    inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size();
-                }
-
-                armnn::Optional<std::string> dataFile = params.m_GenerateTensorData ?
-                                                        armnn::EmptyOptional() :
-                                                        armnn::MakeOptional<std::string>(
-                                                                params.m_InputTensorDataFilePaths.at(
-                                                                        inputFileIndex));
-
-                unsigned int numElements = model.GetInputSize(i);
-                if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i])
-                {
-                    // If the user has provided a tensor shape for the current input,
-                    // override numElements
-                    numElements = params.m_InputTensorShapes[i]->GetNumElements();
-                }
-
-                armnnUtils::TContainer tensorData;
-                PopulateTensorWithData(tensorData,
-                                       numElements,
-                                       params.m_InputTypes[i],
-                                       qParams,
-                                       dataFile);
-
-                input.push_back(tensorData);
-            }
-
-            for (unsigned int i = 0; i < numOutputs; ++i)
-            {
-                if (params.m_OutputTypes[i].compare("float") == 0)
-                {
-                    output.push_back(std::vector<float>(model.GetOutputSize(i)));
-                } else if (params.m_OutputTypes[i].compare("int") == 0) {
-                    output.push_back(std::vector<int>(model.GetOutputSize(i)));
-                } else if (params.m_OutputTypes[i].compare("qasymm8") == 0 ||
-                           params.m_OutputTypes[i].compare("qasymmu8") == 0)
-                {
-                    output.push_back(std::vector<uint8_t>(model.GetOutputSize(i)));
-                } else if (params.m_OutputTypes[i].compare("qasymms8") == 0)
-                {
-                    output.push_back(std::vector<int8_t>(model.GetOutputSize(i)));
-                } else {
-                    ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". ";
-                    return EXIT_FAILURE;
-                }
-            }
-
-            std::vector<std::chrono::duration<double, std::milli>> timings;
-            timings.reserve(params.m_Iterations);
-            for (size_t x = 0; x < params.m_Iterations; x++)
-            {
-                // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
-                auto inference_duration = model.Run(input, output);
-                timings.push_back(inference_duration);
-            }
-
-            if (params.m_GenerateTensorData)
-            {
-                ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
-            }
-            if (params.m_DontPrintOutputs)
-            {
-                ARMNN_LOG(info) << "Printing outputs to console is disabled.";
-            }
-
-            // Print output. This only needs to happen once as input is the same for each iteration.
-            const auto &infosOut = model.GetOutputBindingInfos();
-            for (size_t i = 0; i < numOutputs; i++)
-            {
-                const armnn::TensorInfo &infoOut = infosOut[i].second;
-
-                // We've made sure before that the number of output files either equals numOutputs, in which
-                // case we override those files when processing the results of each iteration (only the result
-                // of the last iteration will be stored), or there are enough
-                // output files for each output of each iteration.
-                size_t outputFileIndex = numOutputs + i;
-                if (!params.m_OutputTensorFiles.empty())
-                {
-                    outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
-                    ARMNN_LOG(info) << "Writing output " << i << " named: '"
-                                    << inferenceModelParams.m_OutputBindings[i] <<" to file: '"
-                                    << params.m_OutputTensorFiles[outputFileIndex] << "'";
-                }
-                auto outputTensorFile = params.m_OutputTensorFiles.empty()
-                                        ? ""
-                                        : params.m_OutputTensorFiles[outputFileIndex];
-
-                TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
-                                      infoOut,
-                                      outputTensorFile,
-                                      params.m_DequantizeOutput,
-                                      !params.m_DontPrintOutputs);
-                mapbox::util::apply_visitor(printer, output[i]);
-            }
-
-            for(auto inference: timings)
-            {
-
-                ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
-                                << std::fixed << inference.count() << " ms\n";
-
-                // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
-                if (params.m_ThresholdTime != 0.0)
-                {
-                    ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
-                                    << std::fixed << params.m_ThresholdTime << " ms";
-                    auto thresholdMinusInference = params.m_ThresholdTime - inference.count();
-                    ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
-                                    << std::fixed << thresholdMinusInference << " ms" << "\n";
-
-                    if (thresholdMinusInference < 0)
-                    {
-                        std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
-                        ARMNN_LOG(fatal) << errorMessage;
-                    }
-                }
-            }
-        }
-
-        // Asynchronous execution using the Arm NN thread pool
-        else if (params.m_ThreadPoolSize >= 1)
-        {
-            try
-            {
-                ARMNN_LOG(info) << "Asynchronous Execution with Arm NN thread pool...  \n";
-                armnn::AsyncCallbackManager callbackManager;
-                std::unordered_map<armnn::InferenceId, std::vector<armnnUtils::TContainer>&> inferenceOutputMap;
-
-                // Declare the latest and earliest inference times here to be used when calculating overall time
-                std::chrono::high_resolution_clock::time_point earliestStartTime;
-                std::chrono::high_resolution_clock::time_point latestEndTime =
-                    std::chrono::high_resolution_clock::now();
-
-                // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
-                // LoadedNetwork with each scheduled inference having a specific priority
-                for (size_t i = 0; i < params.m_Iterations; ++i)
-                {
-                    std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
-                    inferenceOutputMap.insert({cb->GetInferenceId(), outputs[i]});
-                    model.RunAsync(inputs[i], outputs[i], cb);
-                }
-
-                // Check the results
-                unsigned int j = 0;
-                for (size_t iteration = 0; iteration < params.m_Iterations; ++iteration)
-                {
-                    auto cb = callbackManager.GetNotifiedCallback();
-
-                    // Get the results
-                    auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
-                    auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
-                    auto inferenceDuration = endTime - startTime;
-
-                    if (latestEndTime < cb->GetEndTime())
-                    {
-                        latestEndTime = cb->GetEndTime();
-                    }
-
-                    if (earliestStartTime.time_since_epoch().count() == 0)
-                    {
-                        earliestStartTime = cb->GetStartTime();
-                    }
-                    else if (earliestStartTime > cb->GetStartTime())
-                    {
-                        earliestStartTime = cb->GetStartTime();
-                    }
-
-                    if (params.m_GenerateTensorData)
-                    {
-                        ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
-                    }
-                    if (params.m_DontPrintOutputs)
-                    {
-                        ARMNN_LOG(info) << "Printing outputs to console is disabled.";
-                    }
-
-                    // Print output tensors
-                    const auto& infosOut = model.GetOutputBindingInfos();
-                    for (size_t i = 0; i < numOutputs; i++)
-                    {
-                        // We've made sure before that the number of output files either equals numOutputs, in which
-                        // case we override those files when processing the results of each iteration (only the
-                        // result of the last iteration will be stored), or there are enough
-                        // output files for each output of each iteration.
-                        size_t outputFileIndex = iteration * numOutputs + i;
-                        if (!params.m_OutputTensorFiles.empty())
-                        {
-                            outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
-                            ARMNN_LOG(info) << "Writing output " << i << " named: '"
-                                            << inferenceModelParams.m_OutputBindings[i]
-                                            << "' of iteration: " << iteration+1 << " to file: '"
-                                            << params.m_OutputTensorFiles[outputFileIndex] << "'";
-                        }
-
-                        const armnn::TensorInfo& infoOut = infosOut[i].second;
-                        auto outputTensorFile = params.m_OutputTensorFiles.empty()
-                                                ? ""
-                                                : params.m_OutputTensorFiles[outputFileIndex];
-
-                        TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
-                                              infoOut,
-                                              outputTensorFile,
-                                              params.m_DequantizeOutput,
-                                              !params.m_DontPrintOutputs);
-                        mapbox::util::apply_visitor(printer, inferenceOutputMap.at(cb->GetInferenceId())[i]);
-                    }
-
-                    CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime);
-                    ++j;
-                }
-                //print duration difference between overallStartTime and overallEndTime
-                auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
-                auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
-                auto totalInferenceDuration = overallEndTime - overallStartTime;
-                ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2)
-                                << std::fixed << totalInferenceDuration.count() << " ms\n";
-            }
-            catch (const armnn::Exception& e)
-            {
-                ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
-                return EXIT_FAILURE;
-            }
-        }
-        // Asynchronous execution using std::launch::async
-        else
-        {
-            try
-            {
-                ARMNN_LOG(info) << "Asynchronous Execution with std::launch:async...  \n";
-                std::vector<std::future<std::tuple<unsigned int,
-                    std::chrono::duration<double, std::milli>>>> inferenceResults;
-                inferenceResults.reserve(params.m_Iterations);
-
-                // Create WorkingMemHandles for each inference
-                std::vector<std::unique_ptr<armnn::experimental::IWorkingMemHandle>> workingMemHandles;
-                workingMemHandles.reserve(params.m_Iterations);
-                for (unsigned int i = 0; i < params.m_Iterations; ++i)
-                {
-                    workingMemHandles.push_back(model.CreateWorkingMemHandle());
-                }
-
-                // Run each inference in its own thread
-                // start a timer
-                const auto start_time = armnn::GetTimeNow();
-                for (unsigned int i = 0; i < params.m_Iterations; ++i)
-                {
-                    armnn::experimental::IWorkingMemHandle& workingMemHandleRef = *workingMemHandles[i].get();
-
-                    inferenceResults.push_back(std::async(
-                        std::launch::async, [&model, &workingMemHandleRef, &inputs, &outputs, i]() {
-                            return model.RunAsync(workingMemHandleRef, inputs[i], outputs[i], i);
-                        }
-                        ));
-                }
-
-                // Check the results
-                for (unsigned int j = 0; j < inferenceResults.size(); ++j)
-                {
-                    // Get the results
-                    auto inferenceResult = inferenceResults[j].get();
-                    auto inferenceDuration = std::get<1>(inferenceResult);
-                    auto inferenceID = std::get<0>(inferenceResult);
-
-                    if (params.m_GenerateTensorData)
-                    {
-                        ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
-                    }
-                    if (params.m_DontPrintOutputs)
-                    {
-                        ARMNN_LOG(info) << "Printing outputs to console is disabled.";
-                    }
-
-                    // Print output tensors
-                    const auto& infosOut = model.GetOutputBindingInfos();
-                    for (size_t i = 0; i < numOutputs; i++)
-                    {
-                        // We've made sure before that the number of output files either equals numOutputs, in which
-                        // case we override those files when processing the results of each iteration (only the
-                        // result of the last iteration will be stored), or there are enough
-                        // output files for each output of each iteration.
-                        size_t outputFileIndex = j * numOutputs + i;
-                        if (!params.m_OutputTensorFiles.empty())
-                        {
-                            outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
-                            ARMNN_LOG(info) << "Writing output " << i << " named: '"
-                                            << inferenceModelParams.m_OutputBindings[i]
-                                            << "' of iteration: " << j+1 << " to file: '"
-                                            << params.m_OutputTensorFiles[outputFileIndex] << "'";
-                        }
-                        const armnn::TensorInfo& infoOut = infosOut[i].second;
-                        auto outputTensorFile = params.m_OutputTensorFiles.empty()
-                                                ? ""
-                                                : params.m_OutputTensorFiles[outputFileIndex];
-
-                        TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
-                                              infoOut,
-                                              outputTensorFile,
-                                              params.m_DequantizeOutput,
-                                              !params.m_DontPrintOutputs);
-                        mapbox::util::apply_visitor(printer, outputs[j][i]);
-                    }
-                    CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime);
-                    ARMNN_LOG(info) << "Asynchronous Execution is finished for Inference ID: " << inferenceID << " \n";
-                }
-                // finish timer
-                const auto duration = armnn::GetTimeDuration(start_time);
-                ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2)
-                                << std::fixed << duration.count() << " ms\n";
-            }
-            catch (const armnn::Exception& e)
-            {
-                ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
-                return EXIT_FAILURE;
-            }
-        }
     }
-    catch (const armnn::Exception& e)
+    else
     {
-        ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
-        return EXIT_FAILURE;
+        return std::make_unique<ArmNNExecutor>(programOptions.m_ExNetParams, programOptions.m_RuntimeOptions);
     }
-
-    return EXIT_SUCCESS;
 }
 
 // MAIN
 int main(int argc, const char* argv[])
 {
     // Configures logging for both the ARMNN library and this test program.
-    #ifdef NDEBUG
+#ifdef NDEBUG
     armnn::LogSeverity level = armnn::LogSeverity::Info;
-    #else
+#else
     armnn::LogSeverity level = armnn::LogSeverity::Debug;
-    #endif
+#endif
     armnn::ConfigureLogging(true, true, level);
 
 
     // Get ExecuteNetwork parameters and runtime options from command line
     // This might throw an InvalidArgumentException if the user provided invalid inputs
-    ProgramOptions ProgramOptions;
-    try {
-        ProgramOptions.ParseOptions(argc, argv);
-    } catch (const std::exception &e){
-        ARMNN_LOG(fatal) << e.what();
-        return EXIT_FAILURE;
+    ProgramOptions programOptions;
+    try
+    {
+        programOptions.ParseOptions(argc, argv);
     }
-
-    if ((ProgramOptions.m_ExNetParams.m_OutputDetailsToStdOut ||
-         ProgramOptions.m_ExNetParams.m_OutputDetailsOnlyToStdOut)
-         && !ProgramOptions.m_ExNetParams.m_EnableProfiling)
+    catch (const std::exception& e)
     {
-        ARMNN_LOG(fatal) << "You must enable profiling if you would like to output layer details";
+        ARMNN_LOG(fatal) << e.what();
         return EXIT_FAILURE;
     }
 
-    std::string modelFormat = ProgramOptions.m_ExNetParams.m_ModelFormat;
+    std::vector<const void*> outputResults;
 
-    // Forward to implementation based on the parser type
-    if (modelFormat.find("armnn") != std::string::npos)
-    {
-    #if defined(ARMNN_SERIALIZER)
-        std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions));
-        return MainImpl<armnnDeserializer::IDeserializer, float>(ProgramOptions.m_ExNetParams, runtime);
-    #else
-        ARMNN_LOG(fatal) << "Not built with serialization support.";
-        return EXIT_FAILURE;
-    #endif
-    }
-    else if (modelFormat.find("onnx") != std::string::npos)
+    auto executor = BuildExecutor(programOptions);
+    if (!executor)
     {
-    #if defined(ARMNN_ONNX_PARSER)
-        std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions));
-        return MainImpl<armnnOnnxParser::IOnnxParser, float>(ProgramOptions.m_ExNetParams, runtime);
-    #else
-        ARMNN_LOG(fatal) << "Not built with Onnx parser support.";
         return EXIT_FAILURE;
-    #endif
     }
-    else if(modelFormat.find("tflite") != std::string::npos)
+
+    executor->PrintNetworkInfo();
+    outputResults = executor->Execute();
+
+    if (!programOptions.m_ExNetParams.m_ComparisonComputeDevices.empty() ||
+         programOptions.m_ExNetParams.m_CompareWithTflite)
     {
-        if (ProgramOptions.m_ExNetParams.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteParser)
+        ExecuteNetworkParams comparisonParams = programOptions.m_ExNetParams;
+        comparisonParams.m_ComputeDevices = programOptions.m_ExNetParams.m_ComparisonComputeDevices;
+
+        if (programOptions.m_ExNetParams.m_CompareWithTflite)
         {
-            #if defined(ARMNN_TF_LITE_PARSER)
-                std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions));
-                return MainImpl<armnnTfLiteParser::ITfLiteParser, float>(ProgramOptions.m_ExNetParams, runtime);
-            #else
-                ARMNN_LOG(fatal) << "Not built with Tensorflow-Lite parser support.";
-                return EXIT_FAILURE;
-            #endif
+            comparisonParams.m_TfLiteExecutor = ExecuteNetworkParams::TfLiteExecutor::TfliteInterpreter;
         }
-        else if (ProgramOptions.m_ExNetParams.m_TfLiteExecutor ==
-                    ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate ||
-                ProgramOptions.m_ExNetParams.m_TfLiteExecutor ==
-                    ExecuteNetworkParams::TfLiteExecutor::TfliteInterpreter)
+
+        auto comparisonExecutor = BuildExecutor(programOptions);
+
+        if (!comparisonExecutor)
         {
-        #if defined(ARMNN_TFLITE_DELEGATE)
-            return TfLiteDelegateMainImpl(ProgramOptions.m_ExNetParams, ProgramOptions.m_RuntimeOptions);
-        #else
-            ARMNN_LOG(fatal) << "Not built with Arm NN Tensorflow-Lite delegate support.";
             return EXIT_FAILURE;
-        #endif
         }
-    }
-    else
-    {
-        ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat
-                         << "'. Please include 'tflite' or 'onnx'";
-        return EXIT_FAILURE;
+
+        comparisonExecutor->PrintNetworkInfo();
+        comparisonExecutor->Execute();
+
+        comparisonExecutor->CompareAndPrintResult(outputResults);
     }
 }
diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp
index cc75bb4323..f341c30738 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp
@@ -1,76 +1,15 @@
 //
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #include "ExecuteNetworkParams.hpp"
 
 #include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
-#include <InferenceModel.hpp>
 #include <armnn/Logging.hpp>
 
 #include <fmt/format.h>
-
-bool IsModelBinary(const std::string& modelFormat)
-{
-    // Parse model binary flag from the model-format string we got from the command-line
-    if (modelFormat.find("binary") != std::string::npos)
-    {
-        return true;
-    }
-    else if (modelFormat.find("txt") != std::string::npos || modelFormat.find("text") != std::string::npos)
-    {
-        return false;
-    }
-    else
-    {
-        throw armnn::InvalidArgumentException(fmt::format("Unknown model format: '{}'. "
-                                                          "Please include 'binary' or 'text'",
-                                                          modelFormat));
-    }
-}
-
-void CheckModelFormat(const std::string& modelFormat)
-{
-    // Forward to implementation based on the parser type
-    if (modelFormat.find("armnn") != std::string::npos)
-    {
-#if defined(ARMNN_SERIALIZER)
-#else
-        throw armnn::InvalidArgumentException("Can't run model in armnn format without a "
-                                              "built with serialization support.");
-#endif
-    }
-    else if (modelFormat.find("onnx") != std::string::npos)
-    {
-#if defined(ARMNN_ONNX_PARSER)
-#else
-        throw armnn::InvalidArgumentException("Can't run model in onnx format without a "
-                                              "built with Onnx parser support.");
-#endif
-    }
-    else if (modelFormat.find("tflite") != std::string::npos)
-    {
-#if defined(ARMNN_TF_LITE_PARSER)
-        if (!IsModelBinary(modelFormat))
-        {
-            throw armnn::InvalidArgumentException(fmt::format("Unknown model format: '{}'. Only 'binary' "
-                                                              "format supported for tflite files",
-                                                              modelFormat));
-        }
-#elif defined(ARMNN_TFLITE_DELEGATE)
-#else
-        throw armnn::InvalidArgumentException("Can't run model in tflite format without a "
-                                              "built with Tensorflow Lite parser support.");
-#endif
-    }
-    else
-    {
-        throw armnn::InvalidArgumentException(fmt::format("Unknown model format: '{}'. "
-                                                          "Please include 'tflite' or 'onnx'",
-                                                          modelFormat));
-    }
-}
+#include <armnnUtils/Filesystem.hpp>
 
 void CheckClTuningParameter(const int& tuningLevel,
                             const std::string& tuningPath,
@@ -105,7 +44,6 @@ void CheckClTuningParameter(const int& tuningLevel,
             ARMNN_LOG(warning) << "To use Cl Tuning the compute device GpuAcc needs to be active.";
         }
     }
-
 }
 
 void ExecuteNetworkParams::ValidateParams()
@@ -120,7 +58,6 @@ void ExecuteNetworkParams::ValidateParams()
                              << invalidBackends;
         }
     }
-
     CheckClTuningParameter(m_TuningLevel, m_TuningPath, m_ComputeDevices);
 
     if (m_EnableBf16TurboMode && m_EnableFp16TurboMode)
@@ -129,10 +66,6 @@ void ExecuteNetworkParams::ValidateParams()
                                               "enabled at the same time.");
     }
 
-    m_IsModelBinary = IsModelBinary(m_ModelFormat);
-
-    CheckModelFormat(m_ModelFormat);
-
     // Check input tensor shapes
     if ((m_InputTensorShapes.size() != 0) &&
         (m_InputTensorShapes.size() != m_InputNames.size()))
@@ -157,68 +90,6 @@ void ExecuteNetworkParams::ValidateParams()
                                 m_InputNames.size(),
                                 m_InputTensorDataFilePaths.size()));
         }
-        else if (m_InputTensorDataFilePaths.size() % m_InputNames.size() != 0)
-        {
-            throw armnn::InvalidArgumentException(
-                    fmt::format("According to the number of input names the user provided the network has {} "
-                                "inputs. The user specified {} input-tensor-data file paths which is not "
-                                "divisible by the number of inputs.",
-                                m_InputNames.size(),
-                                m_InputTensorDataFilePaths.size()));
-        }
-    }
-
-    if (m_InputTypes.size() == 0)
-    {
-        //Defaults the value of all inputs to "float"
-        m_InputTypes.assign(m_InputNames.size(), "float");
-    }
-    else if ((m_InputTypes.size() != 0) &&
-             (m_InputTypes.size() != m_InputNames.size()))
-    {
-        throw armnn::InvalidArgumentException("input-name and input-type must have the same amount of elements.");
-    }
-
-    // Make sure that the number of input files given is divisible by the number of inputs of the model
-    if (!(m_InputTensorDataFilePaths.size() % m_InputNames.size() == 0))
-    {
-        throw armnn::InvalidArgumentException(
-                fmt::format("The number of input-tensor-data files ({0}) is not divisible by the "
-                            "number of inputs ({1} according to the number of input names).",
-                            m_InputTensorDataFilePaths.size(),
-                            m_InputNames.size()));
-    }
-
-    if (m_OutputTypes.size() == 0)
-    {
-        //Defaults the value of all outputs to "float"
-        m_OutputTypes.assign(m_OutputNames.size(), "float");
-    }
-    else if ((m_OutputTypes.size() != 0) &&
-             (m_OutputTypes.size() != m_OutputNames.size()))
-    {
-        throw armnn::InvalidArgumentException("output-name and output-type must have the same amount of elements.");
-    }
-
-    // Make sure that the number of output files given is equal to the number of outputs of the model
-    // or equal to the number of outputs of the model multiplied with the number of iterations
-    if (!m_OutputTensorFiles.empty())
-    {
-        if ((m_OutputTensorFiles.size() != m_OutputNames.size()) &&
-            (m_OutputTensorFiles.size() != m_OutputNames.size() * m_Iterations))
-        {
-            std::stringstream errmsg;
-            auto numOutputs = m_OutputNames.size();
-            throw armnn::InvalidArgumentException(
-                    fmt::format("The user provided {0} output-tensor files. The only allowed number of output-tensor "
-                                "files is the number of outputs of the network ({1} according to the number of "
-                                "output names) or the number of outputs multiplied with the number of times the "
-                                "network should be executed (NumOutputs * NumIterations = {1} * {2} = {3}).",
-                                m_OutputTensorFiles.size(),
-                                numOutputs,
-                                m_Iterations,
-                                numOutputs*m_Iterations));
-        }
     }
 
     // Check that threshold time is not less than zero
@@ -310,4 +181,5 @@ armnnDelegate::DelegateOptions ExecuteNetworkParams::ToDelegateOptions() const
 
     return delegateOptions;
 }
+
 #endif
diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
index 5ef2b6ea7c..e60e3b8877 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -16,8 +16,6 @@
 /// Check ExecuteNetworkProgramOptions.cpp for a description of each parameter
 struct ExecuteNetworkParams
 {
-    using TensorShapePtr = std::unique_ptr<armnn::TensorShape>;
-
     enum class TfLiteExecutor
     {
         ArmNNTfLiteParser,
@@ -25,50 +23,48 @@ struct ExecuteNetworkParams
         TfliteInterpreter
     };
 
-    bool                          m_AllowExpandedDims;
-    std::string                   m_CachedNetworkFilePath;
-    std::vector<armnn::BackendId> m_ComputeDevices;
-    bool                          m_Concurrent;
-    bool                          m_DequantizeOutput;
-    std::string                   m_DynamicBackendsPath;
-    bool                          m_EnableBf16TurboMode;
-    bool                          m_EnableFastMath = false;
-    bool                          m_EnableFp16TurboMode;
-    bool                          m_EnableLayerDetails = false;
-    bool                          m_EnableProfiling;
-    bool                          m_GenerateTensorData;
-    bool                          m_InferOutputShape = false;
-    bool                          m_EnableDelegate = false;
-    std::vector<std::string>      m_InputNames;
-    std::vector<std::string>      m_InputTensorDataFilePaths;
-    std::vector<TensorShapePtr>   m_InputTensorShapes;
-    std::vector<std::string>      m_InputTypes;
-    bool                          m_IsModelBinary;
-    size_t                        m_Iterations;
-    std::string                   m_ModelFormat;
-    std::string                   m_ModelPath;
-    unsigned int                  m_NumberOfThreads;
-    bool                          m_OutputDetailsToStdOut;
-    bool                          m_OutputDetailsOnlyToStdOut;
-    std::vector<std::string>      m_OutputNames;
-    std::vector<std::string>      m_OutputTensorFiles;
-    std::vector<std::string>      m_OutputTypes;
-    bool                          m_ParseUnsupported = false;
-    bool                          m_PrintIntermediate;
-    bool                          m_DontPrintOutputs;
-    bool                          m_QuantizeInput;
-    bool                          m_SaveCachedNetwork;
-    size_t                        m_SimultaneousIterations;
-    size_t                        m_SubgraphId;
-    double                        m_ThresholdTime;
-    int                           m_TuningLevel;
-    std::string                   m_TuningPath;
-    std::string                   m_MLGOTuningFilePath;
-    TfLiteExecutor                m_TfLiteExecutor;
-    size_t                        m_ThreadPoolSize;
-    bool                          m_ImportInputsIfAligned;
-    bool                          m_ReuseBuffers;
-
+    bool                              m_AllowExpandedDims;
+    std::string                       m_CachedNetworkFilePath;
+    std::vector<armnn::BackendId>     m_ComputeDevices;
+    bool                              m_Concurrent;
+    bool                              m_DequantizeOutput;
+    std::string                       m_DynamicBackendsPath;
+    bool                              m_EnableBf16TurboMode;
+    bool                              m_EnableFastMath = false;
+    bool                              m_EnableFp16TurboMode;
+    bool                              m_EnableLayerDetails = false;
+    bool                              m_EnableProfiling;
+    bool                              m_GenerateTensorData;
+    bool                              m_InferOutputShape = false;
+    bool                              m_EnableDelegate = false;
+    bool                              m_IsModelBinary;
+    std::vector<std::string>          m_InputNames;
+    std::vector<std::string>          m_InputTensorDataFilePaths;
+    std::vector<armnn::TensorShape>   m_InputTensorShapes;
+    size_t                            m_Iterations;
+    std::string                       m_ModelPath;
+    unsigned int                      m_NumberOfThreads;
+    bool                              m_OutputDetailsToStdOut;
+    bool                              m_OutputDetailsOnlyToStdOut;
+    std::vector<std::string>          m_OutputNames;
+    std::vector<std::string>          m_OutputTensorFiles;
+    bool                              m_ParseUnsupported = false;
+    bool                              m_PrintIntermediate;
+    bool                              m_DontPrintOutputs;
+    bool                              m_QuantizeInput;
+    bool                              m_SaveCachedNetwork;
+    size_t                            m_SubgraphId;
+    double                            m_ThresholdTime;
+    int                               m_TuningLevel;
+    std::string                       m_TuningPath;
+    std::string                       m_MLGOTuningFilePath;
+    TfLiteExecutor                    m_TfLiteExecutor;
+    size_t                            m_ThreadPoolSize;
+    bool                              m_ImportInputsIfAligned;
+    bool                              m_ReuseBuffers;
+    std::string                       m_ComparisonFile;
+    std::vector<armnn::BackendId>     m_ComparisonComputeDevices;
+    bool                              m_CompareWithTflite;
     // Ensures that the parameters for ExecuteNetwork fit together
     void ValidateParams();
 
diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
index ad35092c1d..de7bc051c7 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
@@ -1,11 +1,10 @@
 //
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #include "ExecuteNetworkProgramOptions.hpp"
 #include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
-#include "InferenceTest.hpp"
 
 #include <armnn/BackendRegistry.hpp>
 #include <armnn/Exceptions.hpp>
@@ -51,8 +50,6 @@ void CheckOptionDependency(const cxxopts::ParseResult& result,
 
 void CheckOptionDependencies(const cxxopts::ParseResult& result)
 {
-    CheckOptionDependency(result, "model-path", "model-format");
-    CheckOptionDependency(result, "input-tensor-shape", "model-path");
     CheckOptionDependency(result, "tuning-level", "tuning-path");
 }
 
@@ -119,10 +116,8 @@ void CheckRequiredOptions(const cxxopts::ParseResult& result)
 
     // For each option in option-group "a) Required
     std::vector<std::string> requiredOptions{"compute",
-                                             "model-format",
-                                             "model-path",
-                                             "input-name",
-                                             "output-name"};
+                                             "model-path"
+                                             };
 
     bool requiredMissing = false;
     for(auto const&  str : requiredOptions)
@@ -141,16 +136,42 @@ void CheckRequiredOptions(const cxxopts::ParseResult& result)
 
 void CheckForDeprecatedOptions(const cxxopts::ParseResult& result)
 {
-    if(result.count("simultaneous-iterations") > 0)
-    {
-        ARMNN_LOG(warning) << "DEPRECATED: The program option 'simultaneous-iterations' is deprecated and will be "
-                              "removed soon. Please use the option 'iterations' combined with 'concurrent' instead.";
-    }
     if(result.count("armnn-tflite-delegate") > 0)
     {
         ARMNN_LOG(warning) << "DEPRECATED: The program option 'armnn-tflite-delegate' is deprecated and will be "
                               "removed soon. Please use the option 'tflite-executor' instead.";
     }
+    if(result.count("concurrent") > 0)
+    {
+        ARMNN_LOG(warning) << "DEPRECATED: The program option 'concurrent' is deprecated and will be "
+                              "removed soon. Please use the option '\"P, thread-pool-size\"' instead.";
+    }
+    if(result.count("input-type") > 0)
+    {
+        ARMNN_LOG(warning) << "DEPRECATED: The program option 'input-type' is deprecated and will be "
+                              "removed soon. The input-types are now automatically set.";
+    }
+    if(result.count("input-name") > 0)
+    {
+        ARMNN_LOG(warning) << "DEPRECATED: The program option 'input-name' is deprecated and will be "
+                              "removed soon. The input-names are now automatically set.";
+    }
+    if(result.count("output-type") > 0)
+    {
+        ARMNN_LOG(warning) << "DEPRECATED: The program option 'output-type' is deprecated and will be "
+                              "removed soon. The output-types are now automatically set.";
+    }
+    if(result.count("output-name") > 0)
+    {
+        ARMNN_LOG(warning) << "DEPRECATED: The program option 'output-name' is deprecated and will be "
+                              "removed soon. The output-names are now automatically set.";
+    }
+    if(result.count("model-format") > 0)
+    {
+        ARMNN_LOG(warning) << "DEPRECATED: The program option 'model-format' is deprecated and will be "
+                              "removed soon. The model-format is now automatically set.";
+    }
+
 }
 
 void ProgramOptions::ValidateExecuteNetworkParams()
@@ -182,12 +203,14 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
                  "you can specify a second or third to fall back on. Possible choices: "
                  + armnn::BackendRegistryInstance().GetBackendIdsAsString()
                  + " NOTE: Multiple compute devices need to be passed as a comma separated list without whitespaces "
-                   "e.g. GpuAcc,CpuAcc,CpuRef or by repeating the program option e.g. '-c Cpuacc -c CpuRef'. "
+                   "e.g. GpuAcc,CpuAcc,CpuRef or by repeating the program option e.g. '-c CpuAcc -c CpuRef'. "
                    "Duplicates are ignored.",
                  cxxopts::value<std::vector<std::string>>())
 
                 ("f,model-format",
-                 "armnn-binary, onnx-binary, onnx-text, tflite-binary",
+                 "armnn-binary, onnx-binary, onnx-text, tflite-binary"
+                 "DEPRECATED: The program option 'model-format' is deprecated and will be "
+                 "removed soon. The model-format is now automatically set.",
                  cxxopts::value<std::string>())
 
                 ("m,model-path",
@@ -195,11 +218,13 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
                  cxxopts::value<std::string>(m_ExNetParams.m_ModelPath))
 
                 ("i,input-name",
-                 "Identifier of the input tensors in the network separated by comma.",
+                 "Identifier of the input tensors in the network separated by comma."
+                 "This option is not required, but can be used to set the order of inputs",
                  cxxopts::value<std::string>())
 
                 ("o,output-name",
-                 "Identifier of the output tensors in the network separated by comma.",
+                 "Identifier of the output tensors in the network separated by comma."
+                 "This option is not required, but can be used to set the order of outputs",
                  cxxopts::value<std::string>());
 
         m_CxxOptions.add_options("b) General")
@@ -208,10 +233,16 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
                  "If left empty (the default), dynamic backends will not be used.",
                  cxxopts::value<std::string>(m_RuntimeOptions.m_DynamicBackendsPath))
 
+                ("P, thread-pool-size",
+                 "Run the network using the Arm NN thread pool with the number of threads provided. ",
+                 cxxopts::value<size_t>(m_ExNetParams.m_ThreadPoolSize)->default_value("0"))
+
                 ("n,concurrent",
                  "This option is for Arm NN internal asynchronous testing purposes. "
                  "False by default. If set to true will use std::launch::async or the Arm NN thread pool, "
-                 "if 'thread-pool-size' is greater than 0, for asynchronous execution.",
+                 "if 'thread-pool-size' is greater than 0, for asynchronous execution."
+                 "DEPRECATED: The program option 'concurrent' is deprecated and will be "
+                 "removed soon. Please use the option '\"P, thread-pool-size\"' instead.",
                  cxxopts::value<bool>(m_ExNetParams.m_Concurrent)->default_value("false")->implicit_value("true"))
 
                 ("d,input-tensor-data",
@@ -233,9 +264,9 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
                  "still match. This is an Experimental parameter that is incompatible with infer-output-shape. "
                  "This parameter may be removed in a later update. ",
                  cxxopts::value<bool>(m_ExNetParams.m_AllowExpandedDims)->default_value("false")
-                 ->implicit_value("true"))
+                         ->implicit_value("true"))
 
-                ("iterations",
+                ("I,iterations",
                  "Number of iterations to run the network for, default is set to 1. "
                  "If you wish to run the model with different input data for every execution you can do so by "
                  "supplying more input file paths to the 'input-tensor-data' option. "
@@ -256,7 +287,7 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
                 ("p,print-intermediate-layers",
                  "If this option is enabled, the output of every graph layer will be printed.",
                  cxxopts::value<bool>(m_ExNetParams.m_PrintIntermediate)->default_value("false")
-                 ->implicit_value("true"))
+                         ->implicit_value("true"))
 
                 ("parse-unsupported",
                  "Add unsupported operators as stand-in layers (where supported by parser)",
@@ -272,6 +303,7 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
                  "If unset, default to not quantized. Accepted values (true or false)"
                  " (Not available when executing ArmNNTfLiteDelegate or TfliteInterpreter)",
                  cxxopts::value<bool>(m_ExNetParams.m_QuantizeInput)->default_value("false")->implicit_value("true"))
+
                 ("r,threshold-time",
                  "Threshold time is the maximum allowed time for inference measured in milliseconds. If the actual "
                  "inference time is greater than the threshold time, the test will fail. By default, no threshold "
@@ -286,7 +318,7 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
                 ("v,visualize-optimized-model",
                  "Enables built optimized model visualizer. If unset, defaults to off.",
                  cxxopts::value<bool>(m_ExNetParams.m_EnableLayerDetails)->default_value("false")
-                 ->implicit_value("true"))
+                         ->implicit_value("true"))
 
                 ("w,write-outputs-to-file",
                  "Comma-separated list of output file paths keyed with the binding-id of the output slot. "
@@ -301,13 +333,17 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
                 ("y,input-type",
                  "The type of the input tensors in the network separated by comma. "
                  "If unset, defaults to \"float\" for all defined inputs. "
-                 "Accepted values (float, int, qasymms8 or qasymmu8).",
+                 "Accepted values (float, int, qasymms8 or qasymmu8)."
+                 "DEPRECATED: The program option 'input-type' is deprecated and will be "
+                 "removed soon. The input-types are now automatically set.",
                  cxxopts::value<std::string>())
 
                 ("z,output-type",
                  "The type of the output tensors in the network separated by comma. "
                  "If unset, defaults to \"float\" for all defined outputs. "
-                 "Accepted values (float, int,  qasymms8 or qasymmu8).",
+                 "Accepted values (float, int,  qasymms8 or qasymmu8)."
+                 "DEPRECATED: The program option 'output-type' is deprecated and will be "
+                 "removed soon. The output-types are now automatically set.",
                  cxxopts::value<std::string>())
 
                 ("T,tflite-executor",
@@ -317,30 +353,27 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
                  "tflite is the TfliteInterpreter",
                  cxxopts::value<std::string>()->default_value("parser"))
 
-                ("D,armnn-tflite-delegate",
-                 "Enable Arm NN TfLite delegate. "
-                 "DEPRECATED: This option is deprecated please use tflite-executor instead",
-                 cxxopts::value<bool>(m_ExNetParams.m_EnableDelegate)->default_value("false")->implicit_value("true"))
-
-                ("simultaneous-iterations",
-                 "Number of simultaneous iterations to async-run the network for, default is set to 1 (disabled). "
-                 "When thread-pool-size is set the Arm NN thread pool is used. Otherwise std::launch::async is used."
-                 "DEPRECATED: This option is deprecated and will be removed soon. "
-                 "Please use the option 'iterations' combined with 'concurrent' instead.",
-                 cxxopts::value<size_t>(m_ExNetParams.m_SimultaneousIterations)->default_value("1"))
-
-                ("thread-pool-size",
+                ("C, compare-output",
                  "Number of Arm NN threads to use when running the network asynchronously via the Arm NN thread pool. "
                  "The default is set to 0 which equals disabled. If 'thread-pool-size' is greater than 0 the "
                  "'concurrent' option is automatically set to true.",
-                 cxxopts::value<size_t>(m_ExNetParams.m_ThreadPoolSize)->default_value("0"));
+                 cxxopts::value<std::string>(m_ExNetParams.m_ComparisonFile))
+
+                ("B, compare-output-with-backend",
+                 "Compare the output of the network with a different backend.",
+                 cxxopts::value<std::vector<std::string>>())
+
+                ("A, compare-with-tflite",
+                 "Compare the output of the network with the tflite ref model.",
+                 cxxopts::value<bool>(m_ExNetParams.m_CompareWithTflite)->default_value("false")
+                         ->implicit_value("true"));
 
         m_CxxOptions.add_options("c) Optimization")
                 ("bf16-turbo-mode",
                  "If this option is enabled, FP32 layers, "
                  "weights and biases will be converted to BFloat16 where the backend supports it",
                  cxxopts::value<bool>(m_ExNetParams.m_EnableBf16TurboMode)
-                 ->default_value("false")->implicit_value("true"))
+                         ->default_value("false")->implicit_value("true"))
 
                 ("enable-fast-math",
                  "Enables fast_math options in backends that support it. Using the fast_math flag can lead to "
@@ -357,7 +390,7 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
                  "Enables saving of the cached network to a file given with the cached-network-filepath option. "
                  "See also --cached-network-filepath",
                  cxxopts::value<bool>(m_ExNetParams.m_SaveCachedNetwork)
-                 ->default_value("false")->implicit_value("true"))
+                         ->default_value("false")->implicit_value("true"))
 
                 ("cached-network-filepath",
                  "If non-empty, the given file will be used to load/save the cached network. "
@@ -371,7 +404,7 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
                  "If this option is enabled, FP32 layers, "
                  "weights and biases will be converted to FP16 where the backend supports it",
                  cxxopts::value<bool>(m_ExNetParams.m_EnableFp16TurboMode)
-                 ->default_value("false")->implicit_value("true"))
+                         ->default_value("false")->implicit_value("true"))
 
                 ("tuning-level",
                  "Sets the tuning level which enables a tuning run which will update/create a tuning file. "
@@ -384,12 +417,12 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
                  cxxopts::value<std::string>(m_ExNetParams.m_TuningPath))
 
                 ("MLGOTuningFilePath",
-                "Path to tuning file. Enables use of CL MLGO tuning",
-                cxxopts::value<std::string>(m_ExNetParams.m_MLGOTuningFilePath))
+                 "Path to tuning file. Enables use of CL MLGO tuning",
+                 cxxopts::value<std::string>(m_ExNetParams.m_MLGOTuningFilePath))
 
                 ("R, reuse-buffers",
-                "If enabled then the IO buffers will be reused for each inference",
-                cxxopts::value<bool>(m_ExNetParams.m_ReuseBuffers)->default_value("false")->implicit_value("true"));
+                 "If enabled then the IO buffers will be reused for each inference",
+                 cxxopts::value<bool>(m_ExNetParams.m_ReuseBuffers)->default_value("false")->implicit_value("true"));
 
         m_CxxOptions.add_options("d) Profiling")
                 ("a,enable-external-profiling",
@@ -404,7 +437,7 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
                 ("g,file-only-external-profiling",
                  "If enabled then the 'file-only' test mode of external profiling will be enabled",
                  cxxopts::value<bool>(m_RuntimeOptions.m_ProfilingOptions.m_FileOnly)
-                 ->default_value("false")->implicit_value("true"))
+                         ->default_value("false")->implicit_value("true"))
 
                 ("file-format",
                  "If profiling is enabled specifies the output file format",
@@ -421,7 +454,7 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
                 ("timeline-profiling",
                  "If enabled timeline profiling will be switched on, requires external profiling",
                  cxxopts::value<bool>(m_RuntimeOptions.m_ProfilingOptions.m_TimelineEnabled)
-                 ->default_value("false")->implicit_value("true"))
+                         ->default_value("false")->implicit_value("true"))
 
                 ("u,counter-capture-period",
                  "If profiling is enabled in 'file-only' mode this is the capture period that will be used in the test",
@@ -430,11 +463,12 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
                 ("output-network-details",
                  "Outputs layer tensor infos and descriptors to std out along with profiling events. Defaults to off.",
                  cxxopts::value<bool>(m_ExNetParams.m_OutputDetailsToStdOut)->default_value("false")
-                                                                            ->implicit_value("true"))
+                         ->implicit_value("true"))
+
                 ("output-network-details-only",
                  "Outputs layer tensor infos and descriptors to std out without profiling events. Defaults to off.",
                  cxxopts::value<bool>(m_ExNetParams.m_OutputDetailsOnlyToStdOut)->default_value("false")
-                                                                                ->implicit_value("true"))
+                         ->implicit_value("true"))
 
                 ("import-inputs-if-aligned",
                  "In & Out tensors will be imported per inference if the memory alignment allows. Defaults to false.",
@@ -469,25 +503,25 @@ void ProgramOptions::ParseOptions(int ac, const char* av[])
     CheckOptionDependencies(m_CxxResult);
     CheckForDeprecatedOptions(m_CxxResult);
 
+    if ((m_ExNetParams.m_OutputDetailsToStdOut ||
+         m_ExNetParams.m_OutputDetailsOnlyToStdOut) &&
+        !m_ExNetParams.m_EnableProfiling)
+    {
+        throw cxxopts::OptionParseException("You must enable profiling if you would like to output layer details");
+    }
+
     // Some options can't be assigned directly because they need some post-processing:
     auto computeDevices = GetOptionValue<std::vector<std::string>>("compute", m_CxxResult);
     m_ExNetParams.m_ComputeDevices = GetBackendIDs(computeDevices);
-    m_ExNetParams.m_ModelFormat =
-            armnn::stringUtils::StringTrimCopy(GetOptionValue<std::string>("model-format", m_CxxResult));
     m_ExNetParams.m_InputNames =
             ParseStringList(GetOptionValue<std::string>("input-name", m_CxxResult), ",");
     m_ExNetParams.m_InputTensorDataFilePaths =
             ParseStringList(GetOptionValue<std::string>("input-tensor-data", m_CxxResult), ",");
     m_ExNetParams.m_OutputNames =
             ParseStringList(GetOptionValue<std::string>("output-name", m_CxxResult), ",");
-    m_ExNetParams.m_InputTypes =
-            ParseStringList(GetOptionValue<std::string>("input-type", m_CxxResult), ",");
-    m_ExNetParams.m_OutputTypes =
-            ParseStringList(GetOptionValue<std::string>("output-type", m_CxxResult), ",");
     m_ExNetParams.m_OutputTensorFiles =
             ParseStringList(GetOptionValue<std::string>("write-outputs-to-file", m_CxxResult), ",");
-    m_ExNetParams.m_GenerateTensorData =
-            m_ExNetParams.m_InputTensorDataFilePaths.empty();
+    m_ExNetParams.m_GenerateTensorData = m_ExNetParams.m_InputTensorDataFilePaths.empty();
     m_ExNetParams.m_DynamicBackendsPath = m_RuntimeOptions.m_DynamicBackendsPath;
 
     m_RuntimeOptions.m_EnableGpuProfiling = m_ExNetParams.m_EnableProfiling;
@@ -517,13 +551,13 @@ void ProgramOptions::ParseOptions(int ac, const char* av[])
     {
         m_ExNetParams.m_TfLiteExecutor = ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate;
     }
-    if (m_ExNetParams.m_SimultaneousIterations > 1)
+
+    // Set concurrent to true if the user expects to run inferences asynchronously
+    if (m_ExNetParams.m_Concurrent)
     {
-        m_ExNetParams.m_Iterations = m_ExNetParams.m_SimultaneousIterations;
-        m_ExNetParams.m_Concurrent = true;
+        m_ExNetParams.m_ThreadPoolSize = 1;
     }
 
-    // Set concurrent to true if the user expects to run inferences asynchronously
     if (m_ExNetParams.m_ThreadPoolSize > 0)
     {
         m_ExNetParams.m_Concurrent = true;
@@ -543,7 +577,7 @@ void ProgramOptions::ParseOptions(int ac, const char* av[])
             std::vector<unsigned int> dims = ParseArray(ss);
 
             m_ExNetParams.m_InputTensorShapes.push_back(
-                    std::make_unique<armnn::TensorShape>(static_cast<unsigned int>(dims.size()), dims.data()));
+                    armnn::TensorShape{static_cast<unsigned int>(dims.size()), dims.data()});
         }
     }
 
@@ -568,5 +602,12 @@ void ProgramOptions::ParseOptions(int ac, const char* av[])
     }
 
     ValidateRuntimeOptions();
+
+    auto comparisonComputDevices = GetOptionValue<std::vector<std::string>>("compare-output-with-backend", m_CxxResult);
+
+    if (!comparisonComputDevices.empty())
+    {
+        m_ExNetParams.m_ComparisonComputeDevices = GetBackendIDs(comparisonComputDevices);
+    }
 }
 
diff --git a/tests/ExecuteNetwork/IExecutor.hpp b/tests/ExecuteNetwork/IExecutor.hpp
new file mode 100644
index 0000000000..4ed6cbde84
--- /dev/null
+++ b/tests/ExecuteNetwork/IExecutor.hpp
@@ -0,0 +1,22 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+#include <vector>
+
+/// IExecutor executes a network
+class IExecutor
+{
+public:
+    /// Execute the given network
+    /// @return std::vector<const void*> A type erased vector of the outputs,
+    /// that can be compared with the output of another IExecutor
+    virtual std::vector<const void*> Execute()  = 0;
+    /// Print available information about the network
+    virtual void PrintNetworkInfo() = 0;
+    /// Compare the output with the result of another IExecutor
+    virtual void CompareAndPrintResult(std::vector<const void*> otherOutput) = 0;
+    virtual ~IExecutor(){};
+};
diff --git a/tests/ExecuteNetwork/TfliteExecutor.cpp b/tests/ExecuteNetwork/TfliteExecutor.cpp
new file mode 100644
index 0000000000..dc495be5c3
--- /dev/null
+++ b/tests/ExecuteNetwork/TfliteExecutor.cpp
@@ -0,0 +1,251 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "TfliteExecutor.hpp"
+
+TfLiteExecutor::TfLiteExecutor(const ExecuteNetworkParams& params) : m_Params(params)
+{
+    std::unique_ptr<tflite::FlatBufferModel> model =
+            tflite::FlatBufferModel::BuildFromFile(m_Params.m_ModelPath.c_str());
+
+    m_TfLiteInterpreter =  std::make_unique<Interpreter>();
+    tflite::ops::builtin::BuiltinOpResolver resolver;
+
+    tflite::InterpreterBuilder builder(*model, resolver);
+    builder(&m_TfLiteInterpreter);
+    m_TfLiteInterpreter->AllocateTensors();
+
+    int status = kTfLiteError;
+    if (m_Params.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate)
+    {
+        // Create the Armnn Delegate
+        // Populate a DelegateOptions from the ExecuteNetworkParams.
+        armnnDelegate::DelegateOptions delegateOptions = m_Params.ToDelegateOptions();
+        delegateOptions.SetExternalProfilingParams(delegateOptions.GetExternalProfilingParams());
+
+        std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+                theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                                 armnnDelegate::TfLiteArmnnDelegateDelete);
+        // Register armnn_delegate to TfLiteInterpreter
+        status = m_TfLiteInterpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
+        if (status == kTfLiteError)
+        {
+            LogAndThrow("Could not register ArmNN TfLite Delegate to TfLiteInterpreter");
+        }
+    }
+    else
+    {
+        std::cout << "Running on TfLite without ArmNN delegate\n";
+    }
+
+    armnn::Optional<std::string> dataFile = m_Params.m_GenerateTensorData
+                                            ? armnn::EmptyOptional()
+                                            : armnn::MakeOptional<std::string>(m_Params.m_InputTensorDataFilePaths[0]);
+
+    const size_t numInputs = m_Params.m_InputNames.size();
+
+    for(unsigned int inputIndex = 0; inputIndex < numInputs; ++inputIndex)
+    {
+        int input = m_TfLiteInterpreter->inputs()[inputIndex];
+
+        TfLiteIntArray* inputDims = m_TfLiteInterpreter->tensor(input)->dims;
+
+        unsigned int inputSize = 1;
+        for (unsigned int dim = 0; dim < static_cast<unsigned int>(inputDims->size); ++dim)
+        {
+            inputSize *= inputDims->data[dim];
+        }
+
+        const auto& inputName = m_TfLiteInterpreter->input_tensor(input)->name;
+        const auto& dataType = m_TfLiteInterpreter->input_tensor(input)->type;
+
+        switch (dataType)
+        {
+            case kTfLiteFloat32:
+            {
+                auto inputData = m_TfLiteInterpreter->typed_tensor<float>(input);
+                PopulateTensorWithData(inputData, inputSize, dataFile, inputName);
+                break;
+            }
+            case kTfLiteInt32:
+            {
+                auto inputData = m_TfLiteInterpreter->typed_tensor<int>(input);
+                PopulateTensorWithData(inputData, inputSize, dataFile, inputName);
+                break;
+            }
+            case kTfLiteUInt8:
+            {
+                auto inputData = m_TfLiteInterpreter->typed_tensor<uint8_t>(input);
+                PopulateTensorWithData(inputData, inputSize, dataFile, inputName);
+                break;
+            }
+            case kTfLiteInt16:
+            {
+                auto inputData = m_TfLiteInterpreter->typed_tensor<int16_t>(input);
+                PopulateTensorWithData(inputData, inputSize, dataFile, inputName);
+                break;
+            }
+            case kTfLiteInt8:
+            {
+                auto inputData = m_TfLiteInterpreter->typed_tensor<int8_t>(input);
+                PopulateTensorWithData(inputData, inputSize, dataFile, inputName);
+                break;
+            }
+            default:
+            {
+                LogAndThrow("Unsupported input tensor data type");
+            }
+        }
+    }
+}
+
+std::vector<const void *> TfLiteExecutor::Execute()
+{
+    int status = 0;
+    std::vector<const void*> results;
+    for (size_t x = 0; x < m_Params.m_Iterations; x++)
+    {
+        // Start timer to record inference time in milliseconds.
+        const auto start_time = armnn::GetTimeNow();
+        // Run the inference
+        status = m_TfLiteInterpreter->Invoke();
+        const auto duration = armnn::GetTimeDuration(start_time);
+
+        if (m_Params.m_DontPrintOutputs || m_Params.m_ReuseBuffers)
+        {
+            break;
+        }
+        // Print out the output
+        for (unsigned int outputIndex = 0; outputIndex < m_TfLiteInterpreter->outputs().size(); ++outputIndex)
+        {
+            auto tfLiteDelegateOutputId = m_TfLiteInterpreter->outputs()[outputIndex];
+            TfLiteIntArray* outputDims = m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->dims;
+            // If we've been asked to write to a file then set a file output stream. Otherwise use stdout.
+            FILE* outputTensorFile = stdout;
+            if (!m_Params.m_OutputTensorFiles.empty())
+            {
+                outputTensorFile = fopen(m_Params.m_OutputTensorFiles[outputIndex].c_str(), "w");
+                if (outputTensorFile == NULL)
+                {
+                    LogAndThrow("Specified output tensor file, \"" + m_Params.m_OutputTensorFiles[outputIndex] +
+                                "\", cannot be created. Defaulting to stdout. Error was: " + std::strerror(errno));
+                }
+                else
+                {
+                    ARMNN_LOG(info) << "Writing output " << outputIndex << "' of iteration: " << x+1 << " to file: '"
+                                    << m_Params.m_OutputTensorFiles[outputIndex] << "'";
+                }
+            }
+            long outputSize = 1;
+            for (unsigned int dim = 0; dim < static_cast<unsigned int>(outputDims->size); ++dim)
+            {
+                outputSize *=  outputDims->data[dim];
+            }
+
+            std::cout << m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->name << ": ";
+            results.push_back(m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->allocation);
+
+            switch (m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->type)
+            {
+
+                case kTfLiteFloat32:
+                {
+                    auto tfLiteDelageOutputData = m_TfLiteInterpreter->typed_tensor<float>(tfLiteDelegateOutputId);
+
+                    for (int i = 0; i < outputSize; ++i)
+                    {
+                        fprintf(outputTensorFile, "%f ", tfLiteDelageOutputData[i]);
+                    }
+                    break;
+                }
+                case kTfLiteInt32:
+                {
+                    auto tfLiteDelageOutputData = m_TfLiteInterpreter->typed_tensor<int32_t>(tfLiteDelegateOutputId);
+                    for (int i = 0; i < outputSize; ++i)
+                    {
+                        fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]);
+                    }
+                    break;
+                }
+                case kTfLiteUInt8:
+                {
+                    auto tfLiteDelageOutputData = m_TfLiteInterpreter->typed_tensor<uint8_t>(tfLiteDelegateOutputId);
+                    for (int i = 0; i < outputSize; ++i)
+                    {
+                        fprintf(outputTensorFile, "%u ", tfLiteDelageOutputData[i]);
+                    }
+                    break;
+                }
+                case kTfLiteInt8:
+                {
+                    auto tfLiteDelageOutputData = m_TfLiteInterpreter->typed_tensor<int8_t>(tfLiteDelegateOutputId);
+                    for (int i = 0; i < outputSize; ++i)
+                    {
+                        fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]);
+                    }
+                    break;
+                }
+                default:
+                {
+                    LogAndThrow("Unsupported output type");
+                }
+            }
+
+            std::cout << std::endl;
+        }
+        CheckInferenceTimeThreshold(duration, m_Params.m_ThresholdTime);
+    }
+
+    std::cout << status;
+    return results;
+}
+
+void TfLiteExecutor::CompareAndPrintResult(std::vector<const void*> otherOutput)
+{
+    for (unsigned int outputIndex = 0; outputIndex < m_TfLiteInterpreter->outputs().size(); ++outputIndex)
+    {
+        auto tfLiteDelegateOutputId = m_TfLiteInterpreter->outputs()[outputIndex];
+        float result = 0;
+        switch (m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->type)
+        {
+            case kTfLiteFloat32:
+            {
+                result =  ComputeRMSE<float>(m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->allocation,
+                                             otherOutput[outputIndex],
+                                             m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->bytes);
+
+                break;
+            }
+            case kTfLiteInt32:
+            {
+                result =  ComputeRMSE<int32_t>(m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->allocation,
+                                                    otherOutput[outputIndex],
+                                                    m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->bytes);
+                break;
+            }
+            case kTfLiteUInt8:
+            {
+                result =  ComputeRMSE<uint8_t>(m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->allocation,
+                                                    otherOutput[outputIndex],
+                                                    m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->bytes);
+                break;
+            }
+            case kTfLiteInt8:
+            {
+                result =  ComputeRMSE<int8_t>(m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->allocation,
+                                                    otherOutput[outputIndex],
+                                                    m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->bytes);
+                break;
+            }
+            default:
+            {
+            }
+        }
+
+        std::cout << "RMSE of "
+                  << m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->name
+                  << ": " << result << std::endl;
+    }
+};
diff --git a/tests/ExecuteNetwork/TfliteExecutor.hpp b/tests/ExecuteNetwork/TfliteExecutor.hpp
new file mode 100644
index 0000000000..623d6357eb
--- /dev/null
+++ b/tests/ExecuteNetwork/TfliteExecutor.hpp
@@ -0,0 +1,35 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "IExecutor.hpp"
+#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
+#include "ExecuteNetworkProgramOptions.hpp"
+#include "armnn/utility/NumericCast.hpp"
+#include "armnn/utility/Timer.hpp"
+
+#include <armnn_delegate.hpp>
+#include <DelegateOptions.hpp>
+
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+
+using namespace tflite;
+class  TfLiteExecutor : public IExecutor
+{
+public:
+    TfLiteExecutor(const ExecuteNetworkParams& m_Params);
+
+    std::vector<const void*> Execute() override;
+    void PrintNetworkInfo() override{};
+    void CompareAndPrintResult(std::vector<const void*> otherOutput) override;
+
+private:
+    std::unique_ptr<tflite::FlatBufferModel> m_Model;
+    const ExecuteNetworkParams& m_Params;
+    std::unique_ptr<Interpreter> m_TfLiteInterpreter;
+};
+
diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp
index 93716e1a6f..268f60301c 100644
--- a/tests/InferenceModel.hpp
+++ b/tests/InferenceModel.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -15,6 +15,7 @@
 #include <armnn/utility/NumericCast.hpp>
 
 #include <armnnUtils/TContainer.hpp>
+#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
 
 #include <common/include/ProfilingGuid.hpp>
 
@@ -46,40 +47,6 @@
 #include <vector>
 #include <type_traits>
 
-namespace
-{
-
-inline bool CheckRequestedBackendsAreValid(const std::vector<armnn::BackendId>& backendIds,
-                                           armnn::Optional<std::string&> invalidBackendIds = armnn::EmptyOptional())
-{
-    if (backendIds.empty())
-    {
-        return false;
-    }
-
-    armnn::BackendIdSet validBackendIds = armnn::BackendRegistryInstance().GetBackendIds();
-
-    bool allValid = true;
-    for (const auto& backendId : backendIds)
-    {
-        if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end())
-        {
-            allValid = false;
-            if (invalidBackendIds)
-            {
-                if (!invalidBackendIds.value().empty())
-                {
-                    invalidBackendIds.value() += ", ";
-                }
-                invalidBackendIds.value() += backendId;
-            }
-        }
-    }
-    return allValid;
-}
-
-} // anonymous namespace
-
 namespace InferenceModelInternal
 {
 using BindingPointInfo = armnn::BindingPointInfo;
diff --git a/tests/NetworkExecutionUtils/NetworkExecutionUtils.cpp b/tests/NetworkExecutionUtils/NetworkExecutionUtils.cpp
index 6c74aaa6ed..e3c95d9312 100644
--- a/tests/NetworkExecutionUtils/NetworkExecutionUtils.cpp
+++ b/tests/NetworkExecutionUtils/NetworkExecutionUtils.cpp
@@ -1,110 +1,12 @@
 //
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #include "NetworkExecutionUtils.hpp"
 
 #include <armnnUtils/Filesystem.hpp>
-#include <InferenceTest.hpp>
-#include <ResolveType.hpp>
-
-#if defined(ARMNN_SERIALIZER)
-#include "armnnDeserializer/IDeserializer.hpp"
-#endif
-#if defined(ARMNN_TF_LITE_PARSER)
-#include "armnnTfLiteParser/ITfLiteParser.hpp"
-#endif
-#if defined(ARMNN_ONNX_PARSER)
-#include "armnnOnnxParser/IOnnxParser.hpp"
-#endif
-
-template<armnn::DataType NonQuantizedType>
-auto ParseDataArray(std::istream& stream);
-
-template<armnn::DataType QuantizedType>
-auto ParseDataArray(std::istream& stream,
-                    const float& quantizationScale,
-                    const int32_t& quantizationOffset);
-
-template<>
-auto ParseDataArray<armnn::DataType::Float32>(std::istream& stream)
-{
-    return ParseArrayImpl<float>(stream, [](const std::string& s) { return std::stof(s); });
-}
-
-template<>
-auto ParseDataArray<armnn::DataType::Signed32>(std::istream& stream)
-{
-    return ParseArrayImpl<int>(stream, [](const std::string& s) { return std::stoi(s); });
-}
-
-template<>
-auto ParseDataArray<armnn::DataType::QAsymmS8>(std::istream& stream)
-{
-    return ParseArrayImpl<int8_t>(stream,
-                                  [](const std::string& s) { return armnn::numeric_cast<int8_t>(std::stoi(s)); });
-}
-
-template<>
-auto ParseDataArray<armnn::DataType::QAsymmU8>(std::istream& stream)
-{
-    return ParseArrayImpl<uint8_t>(stream,
-                                   [](const std::string& s) { return armnn::numeric_cast<uint8_t>(std::stoi(s)); });
-}
-
-
-template<>
-auto ParseDataArray<armnn::DataType::QSymmS8>(std::istream& stream)
-{
-    return ParseArrayImpl<int8_t>(stream,
-                                   [](const std::string& s) { return armnn::numeric_cast<int8_t>(std::stoi(s)); });
-}
-
-template<>
-auto ParseDataArray<armnn::DataType::QAsymmS8>(std::istream& stream,
-                                               const float& quantizationScale,
-                                               const int32_t& quantizationOffset)
-{
-    return ParseArrayImpl<int8_t>(stream,
-                                  [&quantizationScale, &quantizationOffset](const std::string& s)
-                                  {
-                                      return armnn::numeric_cast<int8_t>(
-                                              armnn::Quantize<int8_t>(std::stof(s),
-                                                                      quantizationScale,
-                                                                      quantizationOffset));
-                                  });
-}
-
-template<>
-auto ParseDataArray<armnn::DataType::QAsymmU8>(std::istream& stream,
-                                               const float& quantizationScale,
-                                               const int32_t& quantizationOffset)
-{
-    return ParseArrayImpl<uint8_t>(stream,
-                                   [&quantizationScale, &quantizationOffset](const std::string& s)
-                                   {
-                                       return armnn::numeric_cast<uint8_t>(
-                                               armnn::Quantize<uint8_t>(std::stof(s),
-                                                                        quantizationScale,
-                                                                        quantizationOffset));
-                                   });
-}
-
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-std::vector<T> GenerateDummyTensorData(unsigned int numElements)
-{
-    return std::vector<T>(numElements, static_cast<T>(0));
-}
-
-
-std::vector<unsigned int> ParseArray(std::istream& stream)
-{
-    return ParseArrayImpl<unsigned int>(
-            stream,
-            [](const std::string& s) { return armnn::numeric_cast<unsigned int>(std::stoi(s)); });
-}
-
+#include <iterator>
 std::vector<std::string> ParseStringList(const std::string& inputString, const char* delimiter)
 {
     std::stringstream stream(inputString);
@@ -112,189 +14,27 @@ std::vector<std::string> ParseStringList(const std::string& inputString, const c
         return armnn::stringUtils::StringTrimCopy(s); }, delimiter);
 }
 
-
-TensorPrinter::TensorPrinter(const std::string& binding,
-                             const armnn::TensorInfo& info,
-                             const std::string& outputTensorFile,
-                             bool dequantizeOutput,
-                             const bool printToConsole)
-                             : m_OutputBinding(binding)
-                             , m_Scale(info.GetQuantizationScale())
-                             , m_Offset(info.GetQuantizationOffset())
-                             , m_OutputTensorFile(outputTensorFile)
-                             , m_DequantizeOutput(dequantizeOutput)
-                             , m_PrintToConsole(printToConsole) {}
-
-void TensorPrinter::operator()(const std::vector<float>& values)
-{
-    if (m_PrintToConsole)
-    {
-        std::cout << m_OutputBinding << ": ";
-        ForEachValue(values, [](float value)
-        {
-            printf("%f ", value);
-        });
-        printf("\n");
-    }
-    WriteToFile(values);
-}
-
-void TensorPrinter::operator()(const std::vector<uint8_t>& values)
+bool CheckInferenceTimeThreshold(const std::chrono::duration<double, std::milli>& duration,
+                                 const double& thresholdTime)
 {
-    if(m_DequantizeOutput)
+    ARMNN_LOG(info) << "Inference time: " << std::setprecision(2)
+                    << std::fixed << duration.count() << " ms\n";
+    // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
+    if (thresholdTime != 0.0)
     {
-        auto& scale = m_Scale;
-        auto& offset = m_Offset;
-        std::vector<float> dequantizedValues;
-        ForEachValue(values, [&scale, &offset, &dequantizedValues](uint8_t value)
-        {
-            auto dequantizedValue = armnn::Dequantize(value, scale, offset);
-            dequantizedValues.push_back(dequantizedValue);
-        });
-
-        if (m_PrintToConsole)
+        ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
+                        << std::fixed << thresholdTime << " ms";
+        auto thresholdMinusInference = thresholdTime - duration.count();
+        ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
+                        << std::fixed << thresholdMinusInference << " ms" << "\n";
+        if (thresholdMinusInference < 0)
         {
-            std::cout << m_OutputBinding << ": ";
-            ForEachValue(dequantizedValues, [](float value)
-            {
-                printf("%f ", value);
-            });
-            printf("\n");
+            std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
+            ARMNN_LOG(fatal) << errorMessage;
+            return false;
         }
-
-        WriteToFile(dequantizedValues);
-    }
-    else
-    {
-        const std::vector<int> intValues(values.begin(), values.end());
-        operator()(intValues);
     }
-}
-
-void TensorPrinter::operator()(const std::vector<int8_t>& values)
-{
-    if (m_PrintToConsole)
-    {
-        std::cout << m_OutputBinding << ": ";
-        ForEachValue(values, [](int8_t value)
-        {
-            printf("%d ", value);
-        });
-        printf("\n");
-    }
-    WriteToFile(values);
-}
-
-void TensorPrinter::operator()(const std::vector<int>& values)
-{
-    if (m_PrintToConsole)
-    {
-        std::cout << m_OutputBinding << ": ";
-        ForEachValue(values, [](int value)
-        {
-            printf("%d ", value);
-        });
-        printf("\n");
-    }
-    WriteToFile(values);
-}
-
-template<typename Container, typename Delegate>
-void TensorPrinter::ForEachValue(const Container& c, Delegate delegate)
-{
-    for (const auto& value : c)
-    {
-        delegate(value);
-    }
-}
-
-template<typename T>
-void TensorPrinter::WriteToFile(const std::vector<T>& values)
-{
-    if (!m_OutputTensorFile.empty())
-    {
-        std::ofstream outputTensorFile;
-        outputTensorFile.open(m_OutputTensorFile, std::ofstream::out | std::ofstream::trunc);
-        if (outputTensorFile.is_open())
-        {
-            outputTensorFile << m_OutputBinding << ": ";
-            std::copy(values.begin(), values.end(), std::ostream_iterator<T>(outputTensorFile, " "));
-        }
-        else
-        {
-            ARMNN_LOG(info) << "Output Tensor File: " << m_OutputTensorFile << " could not be opened!";
-        }
-        outputTensorFile.close();
-    }
-}
-
-void PopulateTensorWithData(armnnUtils::TContainer& tensorData,
-                            unsigned int numElements,
-                            const std::string& dataTypeStr,
-                            const armnn::Optional<QuantizationParams>& qParams,
-                            const armnn::Optional<std::string>& dataFile)
-{
-    const bool readFromFile = dataFile.has_value() && !dataFile.value().empty();
-    const bool quantizeData = qParams.has_value();
-
-    std::ifstream inputTensorFile;
-    if (readFromFile)
-    {
-        inputTensorFile = std::ifstream(dataFile.value());
-    }
-
-    if (dataTypeStr.compare("float") == 0)
-    {
-        if (quantizeData)
-        {
-            const float qScale  = qParams.value().first;
-            const int   qOffset = qParams.value().second;
-
-            tensorData = readFromFile ?
-                         ParseDataArray<armnn::DataType::QAsymmU8>(inputTensorFile, qScale, qOffset) :
-                         GenerateDummyTensorData<armnn::DataType::QAsymmU8>(numElements);
-        }
-        else
-        {
-            tensorData = readFromFile ?
-                         ParseDataArray<armnn::DataType::Float32>(inputTensorFile) :
-                         GenerateDummyTensorData<armnn::DataType::Float32>(numElements);
-        }
-    }
-    else if (dataTypeStr.compare("int") == 0)
-    {
-        tensorData = readFromFile ?
-                     ParseDataArray<armnn::DataType::Signed32>(inputTensorFile) :
-                     GenerateDummyTensorData<armnn::DataType::Signed32>(numElements);
-    }
-    else if (dataTypeStr.compare("qsymms8") == 0)
-    {
-        tensorData = readFromFile ?
-                     ParseDataArray<armnn::DataType::QSymmS8>(inputTensorFile) :
-                     GenerateDummyTensorData<armnn::DataType::QSymmS8>(numElements);
-    }
-    else if (dataTypeStr.compare("qasymm8") == 0 || dataTypeStr.compare("qasymmu8") == 0)
-    {
-        tensorData = readFromFile ?
-                     ParseDataArray<armnn::DataType::QAsymmU8>(inputTensorFile) :
-                     GenerateDummyTensorData<armnn::DataType::QAsymmU8>(numElements);
-    }
-    else if (dataTypeStr.compare("qasymms8") == 0)
-    {
-        tensorData = readFromFile ?
-                     ParseDataArray<armnn::DataType::QAsymmS8>(inputTensorFile) :
-                     GenerateDummyTensorData<armnn::DataType::QAsymmS8>(numElements);
-    }
-    else
-    {
-        std::string errorMessage = "Unsupported tensor data type " + dataTypeStr;
-        ARMNN_LOG(fatal) << errorMessage;
-
-        inputTensorFile.close();
-        throw armnn::Exception(errorMessage);
-    }
-
-    inputTensorFile.close();
+    return true;
 }
 
 bool ValidatePath(const std::string& file, const bool expectFile)
@@ -312,6 +52,13 @@ bool ValidatePath(const std::string& file, const bool expectFile)
     return true;
 }
 
+std::vector<unsigned int> ParseArray(std::istream& stream)
+{
+    return ParseArrayImpl<unsigned int>(
+            stream,
+            [](const std::string& s) { return armnn::numeric_cast<unsigned int>(std::stoi(s)); });
+}
+
 bool ValidatePaths(const std::vector<std::string>& fileVec, const bool expectFile)
 {
     bool allPathsValid = true;
@@ -325,5 +72,9 @@ bool ValidatePaths(const std::vector<std::string>& fileVec, const bool expectFil
     return allPathsValid;
 }
 
-
+void LogAndThrow(std::string eMsg)
+{
+    ARMNN_LOG(error) << eMsg;
+    throw armnn::Exception(eMsg);
+}
 
diff --git a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp
index bc2868ab35..14d7fe5551 100644
--- a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp
+++ b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp
@@ -1,63 +1,83 @@
 //
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #pragma once
 
-#include <armnn/IRuntime.hpp>
-#include <armnn/Types.hpp>
 #include <armnn/Logging.hpp>
 #include <armnn/utility/StringUtils.hpp>
-#include <armnnUtils/TContainer.hpp>
+#include <armnn/utility/NumericCast.hpp>
+#include <armnn/BackendRegistry.hpp>
 
 #include <iostream>
 #include <fstream>
+#include <iomanip>
+#include <iterator>
 
+/**
+ * Given a measured duration and a threshold time tell the user whether we succeeded or not.
+ *
+ * @param duration the measured inference duration.
+ * @param thresholdTime the threshold time in milliseconds.
+ * @return false if the measured time exceeded the threshold.
+ */
+bool CheckInferenceTimeThreshold(const std::chrono::duration<double, std::milli>& duration,
+                                 const double& thresholdTime);
 
-std::vector<unsigned int> ParseArray(std::istream& stream);
-
-/// Splits a given string at every accurance of delimiter into a vector of string
-std::vector<std::string> ParseStringList(const std::string& inputString, const char* delimiter);
-
-struct TensorPrinter
+inline bool CheckRequestedBackendsAreValid(const std::vector<armnn::BackendId>& backendIds,
+                                           armnn::Optional<std::string&> invalidBackendIds = armnn::EmptyOptional())
 {
-    TensorPrinter(const std::string& binding,
-                  const armnn::TensorInfo& info,
-                  const std::string& outputTensorFile,
-                  bool dequantizeOutput,
-                  bool printToConsole = true);
-
-    void operator()(const std::vector<float>& values);
-
-    void operator()(const std::vector<uint8_t>& values);
-
-    void operator()(const std::vector<int>& values);
+    if (backendIds.empty())
+    {
+        return false;
+    }
 
-    void operator()(const std::vector<int8_t>& values);
+    armnn::BackendIdSet validBackendIds = armnn::BackendRegistryInstance().GetBackendIds();
 
-private:
-    template<typename Container, typename Delegate>
-    void ForEachValue(const Container& c, Delegate delegate);
+    bool allValid = true;
+    for (const auto& backendId : backendIds)
+    {
+        if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end())
+        {
+            allValid = false;
+            if (invalidBackendIds)
+            {
+                if (!invalidBackendIds.value().empty())
+                {
+                    invalidBackendIds.value() += ", ";
+                }
+                invalidBackendIds.value() += backendId;
+            }
+        }
+    }
+    return allValid;
+}
 
-    template<typename T>
-    void WriteToFile(const std::vector<T>& values);
+std::vector<unsigned int> ParseArray(std::istream& stream);
 
-    std::string m_OutputBinding;
-    float m_Scale;
-    int m_Offset;
-    std::string m_OutputTensorFile;
-    bool m_DequantizeOutput;
-    bool m_PrintToConsole;
-};
+/// Splits a given string at every accurance of delimiter into a vector of string
+std::vector<std::string> ParseStringList(const std::string& inputString, const char* delimiter);
 
-using QuantizationParams = std::pair<float, int32_t>;
+/// Dequantize an array of a given type
+/// @param array Type erased array to dequantize
+/// @param numElements Elements in the array
+/// @param array Type erased array to dequantize
+template <typename T>
+std::vector<float> DequantizeArray(const void* array, unsigned int numElements, float scale, int32_t offset)
+{
+    const T* quantizedArray = reinterpret_cast<const T*>(array);
+    std::vector<float> dequantizedVector;
+    dequantizedVector.reserve(numElements);
+    for (unsigned int i = 0; i < numElements; ++i)
+    {
+        float f = armnn::Dequantize(*(quantizedArray + i), scale, offset);
+        dequantizedVector.push_back(f);
+    }
+    return dequantizedVector;
+}
 
-void PopulateTensorWithData(armnnUtils::TContainer& tensorData,
-                            unsigned int numElements,
-                            const std::string& dataTypeStr,
-                            const armnn::Optional<QuantizationParams>& qParams,
-                            const armnn::Optional<std::string>& dataFile);
+void LogAndThrow(std::string eMsg);
 
 /**
  * Verifies if the given string is a valid path. Reports invalid paths to std::err.
@@ -75,6 +95,152 @@ bool ValidatePath(const std::string& file, const bool expectFile);
  * */
 bool ValidatePaths(const std::vector<std::string>& fileVec, const bool expectFile);
 
+/// Returns a function of read the given type as a string
+template <typename Integer, typename std::enable_if_t<std::is_integral<Integer>::value>* = nullptr>
+std::function<Integer(const std::string&)> GetParseElementFunc()
+{
+    return [](const std::string& s) { return armnn::numeric_cast<Integer>(std::stoi(s)); };
+}
+
+template <typename Float, std::enable_if_t<std::is_floating_point<Float>::value>* = nullptr>
+std::function<Float(const std::string&)> GetParseElementFunc()
+{
+    return [](const std::string& s) { return std::stof(s); };
+}
+
+template <typename T>
+void PopulateTensorWithData(T* tensor,
+                            const unsigned int numElements,
+                            const armnn::Optional<std::string>& dataFile,
+                            const std::string& inputName)
+{
+    const bool readFromFile = dataFile.has_value() && !dataFile.value().empty();
+
+    std::ifstream inputTensorFile;
+    if (!readFromFile)
+    {
+        std::fill(tensor, tensor + numElements, 0);
+        return;
+    }
+    else
+    {
+        inputTensorFile = std::ifstream(dataFile.value());
+    }
+
+    auto parseElementFunc = GetParseElementFunc<T>();
+    std::string line;
+    unsigned int index = 0;
+    while (std::getline(inputTensorFile, line))
+    {
+        std::vector<std::string> tokens = armnn::stringUtils::StringTokenizer(line, "\t ,:");
+        for (const std::string& token : tokens)
+        {
+            if (!token.empty()) // See https://stackoverflow.com/questions/10437406/
+            {
+                try
+                {
+                    if (index == numElements)
+                    {
+                        ARMNN_LOG(error) << "Number of elements: " << (index +1) << " in file \"" << dataFile.value()
+                                         << "\" does not match number of elements: " << numElements
+                                         << " for input \"" << inputName << "\".";
+                    }
+                    *(tensor + index) = parseElementFunc(token);
+                    index++;
+                }
+                catch (const std::exception&)
+                {
+                    ARMNN_LOG(error) << "'" << token << "' is not a valid number. It has been ignored.";
+                }
+            }
+        }
+    }
+
+    if (index != numElements)
+    {
+        ARMNN_LOG(error) << "Number of elements: " << (index +1) << " in file \"" << inputName
+                         << "\" does not match number of elements: " << numElements
+                         << " for input \"" << inputName << "\".";
+    }
+}
+
+template<typename T>
+void WriteToFile(const std::string& outputTensorFileName,
+                 const std::string& outputName,
+                 const T* const array,
+                 const unsigned int numElements)
+{
+    std::ofstream outputTensorFile;
+    outputTensorFile.open(outputTensorFileName, std::ofstream::out | std::ofstream::trunc);
+    if (outputTensorFile.is_open())
+    {
+        outputTensorFile << outputName << ": ";
+        std::copy(array, array + numElements, std::ostream_iterator<T>(outputTensorFile, " "));
+    }
+    else
+    {
+        ARMNN_LOG(info) << "Output Tensor File: " << outputTensorFileName << " could not be opened!";
+    }
+    outputTensorFile.close();
+}
+
+struct OutputWriteInfo
+{
+    const armnn::Optional<std::string>& m_OutputTensorFile;
+    const std::string& m_OutputName;
+    const armnn::Tensor& m_Tensor;
+    const bool m_PrintTensor;
+};
+
+template <typename T>
+void PrintTensor(OutputWriteInfo& info, const char* formatString)
+{
+    const T* array = reinterpret_cast<const T*>(info.m_Tensor.GetMemoryArea());
+
+    if (info.m_OutputTensorFile.has_value())
+    {
+        WriteToFile(info.m_OutputTensorFile.value(),
+                    info.m_OutputName,
+                    array,
+                    info.m_Tensor.GetNumElements());
+    }
+
+    if (info.m_PrintTensor)
+    {
+        for (unsigned int i = 0; i < info.m_Tensor.GetNumElements(); i++)
+        {
+            printf(formatString, array[i]);
+        }
+    }
+}
+
+template <typename T>
+void PrintQuantizedTensor(OutputWriteInfo& info)
+{
+    std::vector<float> dequantizedValues;
+    auto tensor = info.m_Tensor;
+    dequantizedValues = DequantizeArray<T>(tensor.GetMemoryArea(),
+                                           tensor.GetNumElements(),
+                                           tensor.GetInfo().GetQuantizationScale(),
+                                           tensor.GetInfo().GetQuantizationOffset());
+
+    if (info.m_OutputTensorFile.has_value())
+    {
+        WriteToFile(info.m_OutputTensorFile.value(),
+                    info.m_OutputName,
+                    dequantizedValues.data(),
+                    tensor.GetNumElements());
+    }
+
+    if (info.m_PrintTensor)
+    {
+        std::for_each(dequantizedValues.begin(), dequantizedValues.end(), [&](float value)
+        {
+            printf("%f ", value);
+        });
+    }
+}
+
 template<typename T, typename TParseElementFunc>
 std::vector<T> ParseArrayImpl(std::istream& stream, TParseElementFunc parseElementFunc, const char* chars = "\t ,:")
 {
@@ -103,21 +269,28 @@ std::vector<T> ParseArrayImpl(std::istream& stream, TParseElementFunc parseEleme
     return result;
 }
 
-template <typename T, typename TParseElementFunc>
-void PopulateTensorWithDataGeneric(std::vector<T>& tensorData,
-                                   unsigned int numElements,
-                                   const armnn::Optional<std::string>& dataFile,
-                                   TParseElementFunc parseFunction)
+/// Compute the root-mean-square error (RMSE)
+/// @param expected
+/// @param actual
+/// @param size size of the tensor
+/// @return float the RMSE
+template<typename T>
+float ComputeRMSE(const void* expected, const void* actual, const size_t size)
 {
-    const bool readFromFile = dataFile.has_value() && !dataFile.value().empty();
+    auto typedExpected = reinterpret_cast<const T*>(expected);
+    auto typedActual = reinterpret_cast<const T*>(actual);
 
-    std::ifstream inputTensorFile;
-    if (readFromFile)
+    T errorSum = 0;
+
+    for (unsigned int i = 0; i < size; i++)
     {
-        inputTensorFile = std::ifstream(dataFile.value());
+        if (std::abs(typedExpected[i] - typedActual[i]) != 0)
+        {
+            std::cout << "";
+        }
+        errorSum += std::pow(std::abs(typedExpected[i] - typedActual[i]), 2);
     }
 
-    tensorData = readFromFile ?
-                 ParseArrayImpl<T>(inputTensorFile, parseFunction) :
-                 std::vector<T>(numElements, static_cast<T>(0));
-}
+    float rmse = std::sqrt(armnn::numeric_cast<float>(errorSum) / armnn::numeric_cast<float>(size / sizeof(T)));
+    return rmse;
+}
\ No newline at end of file
-- 
cgit v1.2.1