41 files changed, 800 insertions, 459 deletions
diff --git a/samples/ImageClassification/requirements.txt b/samples/ImageClassification/requirements.txt
index f6c35b6bc0..66dcfb8d56 100644
--- a/samples/ImageClassification/requirements.txt
+++ b/samples/ImageClassification/requirements.txt
@@ -1,7 +1,7 @@
 #
-# Copyright © 2021-2022 Arm Ltd and Contributors. All rights reserved.
+# Copyright © 2021-2024 Arm Ltd and Contributors. All rights reserved.
 # SPDX-License-Identifier: MIT
 #
 numpy==1.22.0
-Pillow==10.0.1
+Pillow==10.2.0
 pybind11==2.6.2
diff --git a/scripts/get_compute_library.sh b/scripts/get_compute_library.sh
index 63c09629a8..6170b49a50 100755
--- a/scripts/get_compute_library.sh
+++ b/scripts/get_compute_library.sh
@@ -7,10 +7,10 @@
 CMD=$( basename "$0" )
 
 # For pinning to a ref use this:
-DEFAULT_CLFRAMEWORKREVISION="branches/arm_compute_24_04" # Release 24.04
+#DEFAULT_CLFRAMEWORKREVISION="branches/arm_compute_24_04" # Release 24.04
 #
 # For pinning to a revision use this:
-#DEFAULT_CLFRAMEWORKREVISION="1322065a3fbd15b00dbfb0969d6b438b5ba15530" #11399: Specify absolute tolerance
+DEFAULT_CLFRAMEWORKREVISION="0c5ba9ee5ad36aeb6d684c20d473a27f482e83fb" #11421: Change reorder implementation to be vector length agnostic for OHWIo8 reorder
 
 usage() {
   echo -e "get_compute_library.sh: Clones the Arm Compute Library (ACL) repo from the ML Platform server and checks out
diff --git a/src/armnn/layers/SplitterLayer.cpp b/src/armnn/layers/SplitterLayer.cpp
index 8a24e0df1f..b04614b31b 100644
--- a/src/armnn/layers/SplitterLayer.cpp
+++ b/src/armnn/layers/SplitterLayer.cpp
@@ -9,6 +9,7 @@
 #include <armnn/TypesUtils.hpp>
 #include <armnn/backends/WorkloadData.hpp>
 #include <armnn/backends/WorkloadFactory.hpp>
+#include <backendsCommon/WorkloadUtils.hpp>
 
 namespace armnn
 {
@@ -57,26 +58,6 @@ void SplitterLayer::CreateTensors(const TensorHandleFactoryRegistry& registry,
         // check if split is along the x or y (2 innermost dimensions)
         auto numberOfDimensions = m_Param.GetNumDimensions();
 
-        // Compute split axis within class as aclCommon function causes header issues when included
-        auto ComputeSplitAxis = [&](const armnn::SplitterDescriptor& desc, const TensorShape& input)
-        {
-            unsigned int numSplit = desc.GetNumViews();
-            unsigned int numDimensions = desc.GetNumDimensions();
-            std::set<unsigned int> splitAxis;
-
-            for (unsigned int i = 0; i < numSplit; ++i)
-            {
-                for (unsigned int dimIdx = 0; dimIdx < numDimensions; ++dimIdx)
-                {
-                    if (desc.GetViewSizes(i)[dimIdx] != input[dimIdx])
-                    {
-                        splitAxis.insert(dimIdx);
-                    }
-                }
-            }
-            return splitAxis;
-        };
-
         std::set<unsigned int> axis = ComputeSplitAxis(m_Param, parentInfo.GetShape());
         std::set<unsigned int>::iterator axisIt = axis.begin();
 
diff --git a/src/backends/aclCommon/ArmComputeUtils.hpp b/src/backends/aclCommon/ArmComputeUtils.hpp
index d7025aa5e2..fc77f810ee 100644
--- a/src/backends/aclCommon/ArmComputeUtils.hpp
+++ b/src/backends/aclCommon/ArmComputeUtils.hpp
@@ -242,32 +242,6 @@ inline T ComputeSoftmaxAclAxis(const SoftmaxDescriptor& softmaxDesc, const armnn
     return aclAxis;
 }
 
-inline std::set<unsigned int> ComputeSplitAxis(const armnn::SplitterDescriptor& desc, const TensorShape& input)
-{
-    unsigned int numSplit = desc.GetNumViews();
-    unsigned int numDimensions = desc.GetNumDimensions();
-    std::set<unsigned int> splitAxis;
-
-    if (desc.HasAxis())
-    {
-        splitAxis.insert(armnnUtils::GetUnsignedAxis(desc.GetNumDimensions(), desc.GetAxis()));
-    }
-    else
-    {
-        for (unsigned int i = 0; i < numSplit; ++i)
-        {
-            for (unsigned int dimIdx = 0; dimIdx < numDimensions; ++dimIdx)
-            {
-                if (desc.GetViewSizes(i)[dimIdx] != input[dimIdx])
-                {
-                    splitAxis.insert(dimIdx);
-                }
-            }
-        }
-    }
-    return splitAxis;
-}
-
 /// Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank, rank)
 inline int ComputeAclAxis(const int& armnnAxis, const armnn::TensorInfo& tensor)
 {
diff --git a/src/backends/backendsCommon/WorkloadUtils.cpp b/src/backends/backendsCommon/WorkloadUtils.cpp
index e36c4b2128..d459820dde 100644
--- a/src/backends/backendsCommon/WorkloadUtils.cpp
+++ b/src/backends/backendsCommon/WorkloadUtils.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017-2023 Arm Ltd. All rights reserved.
+// Copyright © 2017-2024 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -8,6 +8,7 @@
 #include <armnn/Utils.hpp>
 #include <armnn/utility/NumericCast.hpp>
 #include <armnnUtils/DataLayoutIndexed.hpp>
+#include <armnnUtils/TensorUtils.hpp>
 
 #include <fmt/format.h>
 #include <numeric>
@@ -373,4 +374,29 @@ armnn::PermutationVector GeneratePermutationVectorOnLastTwoDimensions(unsigned i
     return permutationVector;
 }
 
+std::set<unsigned int> ComputeSplitAxis(const armnn::SplitterDescriptor& desc, const TensorShape& input)
+{
+    unsigned int numSplit = desc.GetNumViews();
+    unsigned int numDimensions = desc.GetNumDimensions();
+    std::set<unsigned int> splitAxis;
+    if (desc.HasAxis())
+    {
+        splitAxis.insert(armnnUtils::GetUnsignedAxis(desc.GetNumDimensions(), desc.GetAxis()));
+    }
+    else
+    {
+        for (unsigned int i = 0; i < numSplit; ++i)
+        {
+            for (unsigned int dimIdx = 0; dimIdx < numDimensions; ++dimIdx)
+            {
+                if (desc.GetViewSizes(i)[dimIdx] != input[dimIdx])
+                {
+                    splitAxis.insert(dimIdx);
+                }
+            }
+        }
+    }
+    return splitAxis;
+}
+
 } // namespace armnn
diff --git a/src/backends/backendsCommon/WorkloadUtils.hpp b/src/backends/backendsCommon/WorkloadUtils.hpp
index 6350c2542c..0462df698f 100644
--- a/src/backends/backendsCommon/WorkloadUtils.hpp
+++ b/src/backends/backendsCommon/WorkloadUtils.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017, 2023 Arm Ltd. All rights reserved.
+// Copyright © 2017-2024 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -279,4 +279,11 @@ std::map<std::string, unsigned int> CalculateGatherNdKeyIndices(TensorInfo input
 /// \return - A permutation vector that permutes the 2 last dimensions
 armnn::PermutationVector GeneratePermutationVectorOnLastTwoDimensions(unsigned int rank);
 
+/// Calculates the axis values for split operation.
+///
+/// \param desc - Splitter Descriptor
+/// \param input - Input tensor shape
+/// \return - A set containing axis values of slitter operation
+    std::set<unsigned int> ComputeSplitAxis(const armnn::SplitterDescriptor& desc, const TensorShape& input);
+
 }  //namespace armnn
diff --git a/src/backends/backendsCommon/test/Convolution2dEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/Convolution2dEndToEndTestImpl.hpp
index bc9a94289b..f53f97ae88 100644
--- a/src/backends/backendsCommon/test/Convolution2dEndToEndTestImpl.hpp
+++ b/src/backends/backendsCommon/test/Convolution2dEndToEndTestImpl.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022, 2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 #pragma once
@@ -49,46 +49,51 @@ armnn::INetworkPtr CreateConstConvolution2dNetwork(const armnn::Convolution2dDes
     return network;
 }
 
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+template<DataType ArmnnIType, DataType ArmnnWType = ArmnnIType, DataType ArmnnBType = ArmnnIType,
+        DataType ArmnnOType = ArmnnIType>
 void Convolution2dEndToEnd(const std::vector<armnn::BackendId>& backends,
                            armnn::DataLayout dataLayout,
                            bool biasEnabled = true)
 {
     using namespace armnn;
+    using IT = ResolveType<ArmnnIType>;
+    using WT = ResolveType<ArmnnWType>;
+    using BT = ResolveType<ArmnnBType>;
+    using OT = ResolveType<ArmnnOType>;
 
-    const float   qScale  = IsQuantizedType<T>() ? 0.25f : 1.0f;
-    const int32_t qOffset = IsQuantizedType<T>() ? 50    : 0;
+    const float   qScale  = 1.0f;
+    const int32_t qOffset = IsQuantizedType<IT>() ? 10 : 0; // offset must be zero for non-quantized types
 
-    TensorInfo inputInfo({ 1, 5, 5, 1 }, ArmnnType, qScale, qOffset, true);
-    TensorInfo outputInfo({ 1, 3, 3, 1 }, ArmnnType, qScale, qOffset);
-    TensorInfo weightsInfo({ 1, 3, 3, 1 }, ArmnnType, qScale, qOffset, true);
-    TensorInfo biasesInfo({ 1 }, ArmnnType, qScale * qScale, 0, true);
+    TensorInfo inputInfo(  { 1, 5, 5, 1 }, ArmnnIType, qScale,          qOffset, true);
+    TensorInfo weightsInfo({ 1, 3, 3, 1 }, ArmnnWType, qScale,          qOffset, true);
+    TensorInfo biasesInfo( { 1 },          ArmnnBType, qScale * qScale, 0,       true);
+    TensorInfo outputInfo( { 1, 3, 3, 1 }, ArmnnOType, qScale,          qOffset);
 
     std::vector<float> inputData =
-    {
-        1.0f, 5.0f, 2.0f, 3.0f, 5.0f,
-        8.0f, 7.0f, 3.0f, 6.0f, 3.0f,
-        3.0f, 3.0f, 9.0f, 1.0f, 9.0f,
-        4.0f, 1.0f, 8.0f, 1.0f, 3.0f,
-        6.0f, 8.0f, 1.0f, 9.0f, 2.0f
-    };
+            {
+                    1, 5, 2, 3, 5,
+                    8, 7, 3, 6, 3,
+                    3, 3, 9, 1, 9,
+                    4, 1, 8, 1, 3,
+                    6, 8, 1, 9, 2
+            };
 
     std::vector<float> weightsData =
-    {
-        4.0f, 5.0f, 6.0f,
-        0.0f, 0.0f, 0.0f,
-        3.0f, 2.0f, 1.0f
-    };
+            {
+                    4, 5, 6,
+                    0, 0, 0,
+                    3, 2, 1
+            };
 
-    std::vector<float> biasesData = { 1.0f };
+    std::vector<float> biasesData = { 1 };
+    float bias = biasEnabled ? biasesData[0] : 0;
 
-    float bias = biasEnabled ? biasesData[0] : 0.0f;
     std::vector<float> expectedOutputData =
-    {
-        65.0f + bias,  76.0f + bias,  91.0f + bias,
-        107.0f + bias, 99.0f + bias,  89.0f + bias,
-        116.0f + bias, 98.0f + bias,  118.0f + bias,
-    };
+            {
+                    65 + bias, 76 + bias,  91 + bias,
+                    107 + bias, 99 + bias,  89 + bias,
+                    116 + bias, 98 + bias, 118 + bias
+            };
 
     Convolution2dDescriptor descriptor;
     descriptor.m_PadLeft     = 0;
@@ -102,16 +107,16 @@ void Convolution2dEndToEnd(const std::vector<armnn::BackendId>& backends,
 
     if (dataLayout == DataLayout::NCHW)
     {
-        PermuteTensorNhwcToNchw(inputInfo, inputData);
+        PermuteTensorNhwcToNchw(inputInfo,   inputData);
         PermuteTensorNhwcToNchw(weightsInfo, weightsData);
-        PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
+        PermuteTensorNhwcToNchw(outputInfo,  expectedOutputData);
     }
 
-    // Quantize data
-    std::vector<T> qInputData          = armnnUtils::QuantizedVector<T>(inputData, qScale, qOffset);
-    std::vector<T> qWeightsData        = armnnUtils::QuantizedVector<T>(weightsData, qScale, qOffset);
-    std::vector<T> qExpectedOutputData = armnnUtils::QuantizedVector<T>(expectedOutputData, qScale, qOffset);
-    std::vector<T> qBiasesData         = armnnUtils::QuantizedVector<T>(biasesData, qScale * qScale, 0);
+    // Convert data
+    std::vector<IT> qInputData = armnnUtils::QuantizedVector<IT>(inputData, qScale, qOffset);
+    std::vector<WT> qWeightsData = armnnUtils::QuantizedVector<WT>(weightsData, qScale, qOffset);
+    std::vector<BT> qBiasesData = armnnUtils::QuantizedVector<BT>(biasesData, qScale * qScale, 0);
+    std::vector<OT> qExpectedOutputData = armnnUtils::QuantizedVector<OT>(expectedOutputData, qScale, qOffset);
 
     ConstTensor weights(weightsInfo, qWeightsData);
     ConstTensor biases(biasesInfo, qBiasesData);
@@ -125,10 +130,10 @@ void Convolution2dEndToEnd(const std::vector<armnn::BackendId>& backends,
                                                           biases,
                                                           biasEnabled);
 
-    EndToEndLayerTestImpl<ArmnnType, ArmnnType>(std::move(network),
-                                                {{ 0, qInputData }},
-                                                {{ 0, qExpectedOutputData }},
-                                                backends);
+    EndToEndLayerTestImpl<ArmnnIType, ArmnnOType>(std::move(network),
+                                                  {{ 0, qInputData }},
+                                                  {{ 0, qExpectedOutputData }},
+                                                  backends);
 }
 
 } // anonymous namespace
diff --git a/src/backends/backendsCommon/test/QuantizationEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/QuantizationEndToEndTestImpl.hpp
index f5c2eea601..3039b9b5a3 100644
--- a/src/backends/backendsCommon/test/QuantizationEndToEndTestImpl.hpp
+++ b/src/backends/backendsCommon/test/QuantizationEndToEndTestImpl.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -105,4 +105,24 @@ void QuantizationEndToEndFloat16(const std::vector<armnn::BackendId>& backends)
                                                                  qOffset);
 };
 
+inline void QuantizationEndToEndInt8(const std::vector<armnn::BackendId>& backends)
+{
+    using namespace armnn;
+
+    const TensorShape tensorShape({ 1, 1, 1, 5 });
+
+    std::vector<int8_t> inputData = { 113, 16, 13, 101, 13 };
+    std::vector<int8_t> expectedOutputData = { 127, 45, 41, 127, 41 };
+
+    float qScale = 0.75f;
+    int32_t qOffset = 24;
+
+    QuantizeEndToEndLayerTestImpl<DataType::QSymmS8, DataType::QSymmS8>(backends,
+                                                                        tensorShape,
+                                                                        inputData,
+                                                                        expectedOutputData,
+                                                                        qScale,
+                                                                        qOffset);
+};
+
 }
 \ No newline at end of file
diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp
index bfe4f6e9fd..9f7d562df6 100644
--- a/src/backends/cl/ClLayerSupport.cpp
+++ b/src/backends/cl/ClLayerSupport.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -18,6 +18,7 @@
 #if defined(ARMCOMPUTECL_ENABLED)
 #include <aclCommon/ArmComputeUtils.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <backendsCommon/WorkloadUtils.hpp>
 #include "workloads/ClAbsWorkload.hpp"
 #include "workloads/ClAdditionWorkload.hpp"
 #include "workloads/ClActivationWorkload.hpp"
diff --git a/src/backends/cl/workloads/ClSplitterWorkload.cpp b/src/backends/cl/workloads/ClSplitterWorkload.cpp
index ec904eb51b..074ce5db72 100644
--- a/src/backends/cl/workloads/ClSplitterWorkload.cpp
+++ b/src/backends/cl/workloads/ClSplitterWorkload.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2019-2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2019-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -11,6 +11,7 @@
 #include <aclCommon/ArmComputeUtils.hpp>
 #include <armnn/utility/PolymorphicDowncast.hpp>
 #include <armnn/backends/TensorHandle.hpp>
+#include <backendsCommon/WorkloadUtils.hpp>
 #include <cl/ClTensorHandle.hpp>
 
 
diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp
index ee8f6f28f0..0298c7c552 100644
--- a/src/backends/neon/NeonLayerSupport.cpp
+++ b/src/backends/neon/NeonLayerSupport.cpp
@@ -19,6 +19,7 @@
 #if defined(ARMCOMPUTENEON_ENABLED)
 #include <aclCommon/ArmComputeUtils.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <backendsCommon/WorkloadUtils.hpp>
 #include "workloads/NeonAbsWorkload.hpp"
 #include "workloads/NeonAdditionWorkload.hpp"
 #include "workloads/NeonActivationWorkload.hpp"
diff --git a/src/backends/neon/workloads/NeonSplitterWorkload.cpp b/src/backends/neon/workloads/NeonSplitterWorkload.cpp
index c307822325..bfde497640 100644
--- a/src/backends/neon/workloads/NeonSplitterWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSplitterWorkload.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2019-2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2019-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -12,6 +12,7 @@
 #include <armnn/utility/PolymorphicDowncast.hpp>
 #include <armnn/backends/TensorHandle.hpp>
 #include <neon/NeonTensorHandle.hpp>
+#include <backendsCommon/WorkloadUtils.hpp>
 
 #include "NeonWorkloadUtils.hpp"
 
diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp
index 73786b5ccd..68b7fbff90 100644
--- a/src/backends/reference/test/RefEndToEndTests.cpp
+++ b/src/backends/reference/test/RefEndToEndTests.cpp
@@ -626,6 +626,13 @@ TEST_CASE("RefConvolution2dFloat16Test")
     Convolution2dEndToEnd<armnn::DataType::Float16>(defaultBackends, armnn::DataLayout::NHWC);
 }
 
+TEST_CASE("RefConvolution2dInt8Test")
+{
+    Convolution2dEndToEnd<armnn::DataType::QSymmS8,
+                          armnn::DataType::QSymmS8,
+                          armnn::DataType::Signed32>(defaultBackends, armnn::DataLayout::NHWC);
+}
+
 TEST_CASE("RefConvolution3dFloat32Test")
 {
     Convolution3dEndToEnd<armnn::DataType::Float32, armnn::DataType::Float32>(defaultBackends,
@@ -818,6 +825,12 @@ TEST_CASE("DepthToSpaceEndToEndNhwcInt16")
     DepthToSpaceEndToEnd<armnn::DataType::QSymmS16>(defaultBackends, armnn::DataLayout::NHWC);
 }
 
+// Quantization
+TEST_CASE("RefQuantizeInt8")
+{
+    QuantizationEndToEndInt8(defaultBackends);
+}
+
 // Dequantize
 TEST_CASE("DequantizeEndToEndSimpleTest")
 {
diff --git a/src/backends/tosaCommon/TosaMappings.cpp b/src/backends/tosaCommon/TosaMappings.cpp
index 1ebb68b3c9..0e44d54aab 100644
--- a/src/backends/tosaCommon/TosaMappings.cpp
+++ b/src/backends/tosaCommon/TosaMappings.cpp
@@ -127,7 +127,7 @@ TosaSerializationBasicBlock* GetTosaMapping(const Layer* layer,
     }
 }
 
-TosaSerializationBasicBlock* GetTosaMappingFromLayer(Layer* layer)
+TosaSerializationBasicBlock* GetTosaMappingFromLayer(const Layer* layer)
 {
     std::vector<const TensorInfo*> inputs;
     for (auto inputSlot : layer->GetInputSlots())
diff --git a/src/backends/tosaCommon/TosaMappings.hpp b/src/backends/tosaCommon/TosaMappings.hpp
index cc41f1b7c8..fe1ba3a077 100644
--- a/src/backends/tosaCommon/TosaMappings.hpp
+++ b/src/backends/tosaCommon/TosaMappings.hpp
@@ -27,4 +27,4 @@ TosaSerializationBasicBlock* GetTosaMapping(const Layer* layer,
 
 // Function called in armnn::OptimizeSubgraphView() when access to armnn::Layer is available
 // and there is an option to set TOSA basic block data from constant layer tensors available from the input layer.
-TosaSerializationBasicBlock* GetTosaMappingFromLayer(Layer* layer);
+TosaSerializationBasicBlock* GetTosaMappingFromLayer(const Layer* layer);
diff --git a/src/backends/tosaCommon/operatorMappings/ActivationOperator.cpp b/src/backends/tosaCommon/operatorMappings/ActivationOperator.cpp
index c3e424ea83..c13555da6a 100644
--- a/src/backends/tosaCommon/operatorMappings/ActivationOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/ActivationOperator.cpp
@@ -29,7 +29,7 @@ TosaSerializationBasicBlock* ConvertActivationToTosaOperator(const Layer* layer,
         throw armnn::Exception("ConvertActivationToTosaOperator: 1 output tensor required.");
     }
 
-    std::string inputName       = std::string("input0_");
+    std::string inputName       = std::string("input_");
     std::string outputNameAlpha = std::string("intermediate1_") + GetUniqueTosaMappingID();
     std::string outputNameMul   = std::string("intermediate2_") + GetUniqueTosaMappingID();
     std::string outputName      = std::string("output0_");
@@ -39,12 +39,8 @@ TosaSerializationBasicBlock* ConvertActivationToTosaOperator(const Layer* layer,
     // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
     if (layer != nullptr)
     {
-        // Get the layers connected to the input slots and determine unique tensors names.
-        Layer& connectedInputLayer = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
-        inputName = GenerateUniqueName(connectedInputLayer, 0);
-
-        // Determine unique output tensor name.
-        outputName = GenerateUniqueOutputName(*layer, 0);
+        inputName  = GenerateUniqueInputName(layer->GetInputSlot(0));
+        outputName = GenerateUniqueOutputName(*layer);
     }
 
     std::vector<TosaSerializationTensor*> tensors;
@@ -54,7 +50,7 @@ TosaSerializationBasicBlock* ConvertActivationToTosaOperator(const Layer* layer,
     // There also can't be duplicate tensor.
     std::vector<int32_t> inputShape0;
     DType inputDType0 =  DType::DType_UNKNOWN;
-    if(inputName.find("input0_") != std::string::npos)
+    if(inputName.find("input_") != std::string::npos)
     {
         inputShape0 = GetTosaTensorShape(inputs[0]->GetShape());
         inputDType0 = ArmNNToDType(inputs[0]->GetDataType());
@@ -68,7 +64,8 @@ TosaSerializationBasicBlock* ConvertActivationToTosaOperator(const Layer* layer,
 #if TOSA_COMPAT_VERSION(0, 60, 0)
     std::string outputNameMAXMIN= std::string("intermediate3_") + GetUniqueTosaMappingID();
 
-    if (inputDType0 == DType::DType_FP32)
+    if (inputDType0 == DType::DType_FP32 ||
+        inputDType0 == DType::DType_FP16)
     {
         // const_alpha
         TosaSerializationOperator* alphaOp = nullptr;
@@ -125,49 +122,43 @@ TosaSerializationBasicBlock* ConvertActivationToTosaOperator(const Layer* layer,
         std::string outputNameRescaleIdentity   = std::string("intermediate4_") + GetUniqueTosaMappingID();
         std::string outputNameRescaleMaxMin     = std::string("intermediate5_") + GetUniqueTosaMappingID();
 
-        DType rescale_type      = DType::DType_INT32;
-        float alpha             = activationDescriptor->m_A;
-        double scale_alpha      = inputs[0]->GetQuantizationScale() * alpha / outputs[0]->GetQuantizationScale();
-        double scale_identity   = inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale();
-        int32_t input_zp        = inputs[0]->GetQuantizationOffset();
-        int32_t output_zp       = outputs[0]->GetQuantizationOffset();
+        DType rescale_type    = DType::DType_INT32;
+        float alpha           = activationDescriptor->m_A;
+        double scale_alpha    = inputs[0]->GetQuantizationScale() * alpha / outputs[0]->GetQuantizationScale();
+        double scale_identity = inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale();
+        int32_t input_zp      = inputs[0]->GetQuantizationOffset();
+        int32_t output_zp     = outputs[0]->GetQuantizationOffset();
 
         // Value op_rescale_alpha_in =
         //        buildRescale(rewriter, op, rescale_type, input, scale_alpha,
         //                     input_qtype.getZeroPoint(), 0, true, true);
         TosaSerializationOperator* rescaleAlphaOp = nullptr;
-        TosaSerializationTensor* rescaleAlphaTensor = nullptr;
         CreateRescaleTosaOperator(inputName,
                                   outputNameRescaleAlpha,
-                                  rescale_type,
-                                  inputShape0,
                                   scale_alpha,
                                   input_zp,
                                   0,
                                   true,
                                   true,
-                                  &rescaleAlphaOp,
-                                  &rescaleAlphaTensor);
-        tensors.push_back(rescaleAlphaTensor);
-
+                                  &rescaleAlphaOp);
+        tensors.push_back(new TosaSerializationTensor(outputNameRescaleAlpha,
+                                                      GetTosaTensorShape(inputs[0]->GetShape()),
+                                                      rescale_type, {}));
         // Value op_rescale_identity_in =
         //       buildRescale(rewriter, op, rescale_type, input, scale_identity,
         //                    input_qtype.getZeroPoint(), 0, true, true);
         TosaSerializationOperator* rescaleIdentityOp = nullptr;
-        TosaSerializationTensor* rescaleIdentityTensor = nullptr;
         CreateRescaleTosaOperator(inputName,
                                   outputNameRescaleIdentity,
-                                  rescale_type,
-                                  inputShape0,
                                   scale_identity,
                                   input_zp,
                                   0,
                                   true,
                                   true,
-                                  &rescaleIdentityOp,
-                                  &rescaleIdentityTensor);
-        tensors.push_back(rescaleIdentityTensor);
-
+                                  &rescaleIdentityOp);
+        tensors.push_back(new TosaSerializationTensor(outputNameRescaleIdentity,
+                                                      GetTosaTensorShape(inputs[0]->GetShape()),
+                                                      rescale_type, {}));
         // Value result_int32;
         // if (alpha <= 1.0) {
         //    auto max_op = CreateOpAndInfer<tosa::MaximumOp>(
@@ -198,19 +189,18 @@ TosaSerializationBasicBlock* ConvertActivationToTosaOperator(const Layer* layer,
                                                {outputNameRescaleMaxMin});
 
         }
-        tensors.push_back(new TosaSerializationTensor(outputNameRescaleMaxMin, inputShape0, rescale_type, {}));
+        tensors.push_back(new TosaSerializationTensor(outputNameRescaleMaxMin,
+                                                      GetTosaTensorShape(inputs[0]->GetShape()),
+                                                      rescale_type, {}));
 
         // Value output = buildRescaleFromInt32(rewriter, op, output_type, result_int32,
         //                                      1.0, output_qtype.getZeroPoint());
         TosaSerializationOperator* rescaleOutputOp = nullptr;
         CreateFromInt32RescaleTosaOperator(outputNameRescaleMaxMin,
                                            outputName,
-                                           outputDType0,
-                                           outputShape0,
                                            1.0,
                                            output_zp,
-                                           &rescaleOutputOp,
-                                           nullptr);
+                                           &rescaleOutputOp);
 
         // operatorInputNames/operatorOutputNames ends up being the same as
         // blockInputNames/blockOutputNames for one-to-one ArmNN to Tosa mappings
diff --git a/src/backends/tosaCommon/operatorMappings/AvgPool2DIgnoreValueOperator.cpp b/src/backends/tosaCommon/operatorMappings/AvgPool2DIgnoreValueOperator.cpp
index a7ca873831..bd198e2d5a 100644
--- a/src/backends/tosaCommon/operatorMappings/AvgPool2DIgnoreValueOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/AvgPool2DIgnoreValueOperator.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -10,7 +10,7 @@ TosaSerializationBasicBlock* ConvertAvgPool2DIgnoreValueToTosaOperator(const Lay
                                                                        const std::vector<const TensorInfo*>& outputs,
                                                                        const Pooling2dDescriptor* poolDescriptor)
 {
-    std::string padInputName   = std::string("input0_");
+    std::string padInputName   = std::string("input_");
     std::string padOutputName  = std::string("intermediate0_") + GetUniqueTosaMappingID();
     std::string poolOutputName = std::string("output0_");
     std::string blockName      = std::string("Op_AVG_POOL2D_block_") + GetUniqueTosaMappingID();
@@ -19,12 +19,8 @@ TosaSerializationBasicBlock* ConvertAvgPool2DIgnoreValueToTosaOperator(const Lay
     // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
     if(layer != nullptr)
     {
-        // Get the layers connected to the input slots and determine unique tensors names.
-        Layer& connectedInputLayer = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
-        padInputName = GenerateUniqueName(connectedInputLayer, 0);
-
-        // Determine unique output tensor name.
-        poolOutputName = GenerateUniqueOutputName(*layer, 0);
+        padInputName   = GenerateUniqueInputName(layer->GetInputSlot(0));
+        poolOutputName = GenerateUniqueOutputName(*layer);
     }
 
     std::vector<int> paddings;
@@ -81,7 +77,7 @@ TosaSerializationBasicBlock* ConvertAvgPool2DIgnoreValueToTosaOperator(const Lay
     // Only add input tensors if connected layer is an input layer.
     // As intermediate or constant tensors will be created separately.
     // There also can't be duplicate tensor.
-    if(padInputName.find("input0_") != std::string::npos)
+    if(padInputName.find("input_") != std::string::npos)
     {
         tensors.push_back(new TosaSerializationTensor(padInputName, inputShape, inputDType, {}));
     }
diff --git a/src/backends/tosaCommon/operatorMappings/ConcatOperator.cpp b/src/backends/tosaCommon/operatorMappings/ConcatOperator.cpp
index d1ff0dfb20..905f32c4c4 100644
--- a/src/backends/tosaCommon/operatorMappings/ConcatOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/ConcatOperator.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -21,7 +21,7 @@ TosaSerializationBasicBlock* ConvertConcatToTosaOperator(const Layer* layer,
     {
         for (uint32_t i = 0; i < numInputs; ++i)
         {
-            inputNames.push_back("input"+ std::to_string(i) +"_");
+            inputNames.push_back("input_"+ std::to_string(i));
         }
     }
     // If a layer is present then the block will be used for execution, so input and output names need to be determined
@@ -31,14 +31,12 @@ TosaSerializationBasicBlock* ConvertConcatToTosaOperator(const Layer* layer,
         // Get the layers connected to the input slots and determine unique tensor names.
         for (uint32_t i = 0; i < numInputs; ++i)
         {
-            Layer& connectedLayer = layer->GetInputSlot(i).GetConnectedOutputSlot()->GetOwningLayer();
-
-            std::string inputName = GenerateUniqueName(connectedLayer, i);
+            std::string inputName = GenerateUniqueInputName(layer->GetInputSlot(i));
             inputNames.push_back(inputName);
         }
 
         // Determine unique output tensor name.
-        outputName = GenerateUniqueOutputName(*layer, 0);
+        outputName = GenerateUniqueOutputName(*layer);
     }
 
     auto axis = static_cast<int32_t>(concatDescriptor->GetConcatAxis());
@@ -51,8 +49,7 @@ TosaSerializationBasicBlock* ConvertConcatToTosaOperator(const Layer* layer,
                                                                   {outputName});
 
     std::vector<TosaSerializationTensor*> tensors;
-    tensors.reserve(numInputs);
-
+    tensors.reserve(numInputs + 1);
     for (uint32_t i = 0; i < numInputs; ++i)
     {
         // Only add input tensors for validation or when the connected layer is an input layer.
diff --git a/src/backends/tosaCommon/operatorMappings/Conv2dOperator.cpp b/src/backends/tosaCommon/operatorMappings/Conv2dOperator.cpp
index c65f1891da..6d1699d87b 100644
--- a/src/backends/tosaCommon/operatorMappings/Conv2dOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/Conv2dOperator.cpp
@@ -4,6 +4,8 @@
 //
 
 #include "Conv2dOperator.hpp"
+#include "TosaRescaleOperatorUtils.hpp"
+#include <ResolveType.hpp>
 
 TosaSerializationBasicBlock* ConvertConv2dToTosaOperator(const Layer* layer,
                                                          const std::vector<const TensorInfo*>& inputs,
@@ -14,14 +16,17 @@ TosaSerializationBasicBlock* ConvertConv2dToTosaOperator(const Layer* layer,
     std::string outputName = std::string("output0_");
     std::string blockName  = std::string("Op_CONV2D_block_") + GetUniqueTosaMappingID();
 
+    DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType());
+    DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType());
+
     // Set input names for validation purposes only.
     if(layer == nullptr)
     {
-        inputNames.emplace_back("input0_");
-        inputNames.emplace_back("input1_");
+        inputNames.emplace_back("input_0");
+        inputNames.emplace_back("input_1");
         if(conv2dDescriptor->m_BiasEnabled)
         {
-            inputNames.emplace_back("input2_");
+            inputNames.emplace_back("input_2");
         }
     }
     // If a layer is present then the block will be used for execution, so input and output names need to be
@@ -32,14 +37,12 @@ TosaSerializationBasicBlock* ConvertConv2dToTosaOperator(const Layer* layer,
         // Get the layer connected to the input slot and determine unique tensor names.
         for (uint32_t i = 0; i < inputs.size(); ++i)
         {
-            Layer& connectedLayer = layer->GetInputSlot(i).GetConnectedOutputSlot()->GetOwningLayer();
-
-            std::string inputName = GenerateUniqueName(connectedLayer, i);
+            std::string inputName = GenerateUniqueInputName(layer->GetInputSlot(i));
             inputNames.push_back(inputName);
         }
 
         // Determine unique output tensor name.
-        outputName = GenerateUniqueOutputName(*layer, 0);
+        outputName = GenerateUniqueOutputName(*layer);
     }
 
     std::vector<TosaSerializationTensor*> tensors;
@@ -49,10 +52,9 @@ TosaSerializationBasicBlock* ConvertConv2dToTosaOperator(const Layer* layer,
     // Only add tensor if connected layer is an input layer.
     // As intermediate or constant tensors will be created separately.
     // There also can't be duplicate tensors.
-    if(inputNames[0].find("input0_") != std::string::npos)
+    if(inputNames[0].find("input_") != std::string::npos)
     {
         std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputs[0]->GetShape());
-        DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType());
 
         tensors.push_back(new TosaSerializationTensor(inputNames[0], inputShape0, inputDType0, {}));
     }
@@ -87,23 +89,32 @@ TosaSerializationBasicBlock* ConvertConv2dToTosaOperator(const Layer* layer,
         // The size of the bias must match the channels dimension, so get the correct index.
         unsigned int index = (conv2dDescriptor->m_DataLayout == DataLayout::NHWC) ? 3 : 1;
 
-        std::vector<uint8_t> uint8Data;
-        std::vector<float> data(outputs[0]->GetShape()[index], 0.0f);
+        const DType dType = (inputDType0 == DType_INT8) ? DType_INT32 : outputDType0;
+        std::vector<float> data(outputs[0]->GetShape()[index], 0);
 
+        std::vector<uint8_t> uint8Data;
         TosaSerializationHandler::ConvertF32toU8(data, uint8Data);
 
         tensors.push_back(new TosaSerializationTensor(constantName,
                                                       {static_cast<int32_t>(outputs[0]->GetShape()[index])},
-                                                      DType_FP32,
+                                                      dType,
                                                       uint8Data));
         inputNames.emplace_back(constantName);
     }
 
     // Setup Output Tensor
-    std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputs[0]->GetShape());
-    DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType());
-
-    tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
+    std::vector<int32_t> outputShape0 = {GetTosaTensorShape(outputs[0]->GetShape())};
+    std::string outputConv2dName;
+    bool isInputInt8 = (inputDType0 == DType_INT8);
+    if (isInputInt8)
+    {
+        outputConv2dName = std::string("intermediate0_") + GetUniqueTosaMappingID();
+        tensors.push_back(new TosaSerializationTensor(outputConv2dName, outputShape0, DType_INT32, {}));
+    }
+    else
+    {
+        tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
+    }
 
     // Set up CONV2D operator
     std::vector<int> pad = {static_cast<int>(conv2dDescriptor->m_PadTop),
@@ -114,14 +125,42 @@ TosaSerializationBasicBlock* ConvertConv2dToTosaOperator(const Layer* layer,
                                static_cast<int>(conv2dDescriptor->m_StrideX)};
     std::vector<int> dilation = {static_cast<int>(conv2dDescriptor->m_DilationY),
                                  static_cast<int>(conv2dDescriptor->m_DilationX)};
-    TosaConvAttribute attribute(pad, stride, dilation, 0, 0, false); // input_zp, weight_zp, local_bound
-
-    auto* op = new TosaSerializationOperator(Op_CONV2D,
-                                             Attribute_ConvAttribute,
-                                             &attribute,
-                                             inputNames,
-                                             {outputName});
-    operators.push_back(op);
+    TosaConvAttribute attribute(pad, stride, dilation,
+                                inputs[0]->GetQuantizationOffset(), // input_zp
+                                inputs[1]->GetQuantizationOffset(), // weight_zp
+                                false); // local_bound
+
+    std::string& convOutStr = isInputInt8 ? outputConv2dName : outputName;
+    auto* conv2d_op = new TosaSerializationOperator(Op_CONV2D,
+                                                    Attribute_ConvAttribute,
+                                                    &attribute,
+                                                    inputNames,
+                                                    {convOutStr});
+    operators.push_back(conv2d_op);
+
+    if (isInputInt8)
+    {
+        int32_t output_zp = outputs[0]->GetQuantizationOffset();
+        double output_scale = outputs[0]->GetQuantizationScales()[0];
+        double input_scale = inputs[0]->GetQuantizationScales()[0];
+        const std::vector<float>& weight_scales = inputs[1]->GetQuantizationScales();
+
+        TosaSerializationOperator* rescaleOp = nullptr;
+        CreateRescaleTosaOperatorPerChannel(outputConv2dName,
+                                            outputName,
+                                            0,
+                                            output_zp,
+                                            true,
+                                            true,
+                                            input_scale,
+                                            output_scale,
+                                            weight_scales,
+                                            &rescaleOp);
+        operators.push_back(rescaleOp);
+        tensors.push_back(new TosaSerializationTensor(outputName,
+                                                      outputShape0,
+                                                      DType_INT8, {}));
+    }
 
     // operatorInputNames/operatorOutputNames ends up being the same as
     // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
diff --git a/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp b/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp
index a9af249673..55b4f15e49 100644
--- a/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp
@@ -11,8 +11,8 @@ TosaSerializationBasicBlock* ConvertElementwiseBinaryToTosaOperator(const Layer*
                                                                     const std::vector<const TensorInfo*>& outputs,
                                                                     const ElementwiseBinaryDescriptor* descriptor)
 {
-    std::string input0Name = std::string("input0_");
-    std::string input1Name = std::string("input1_");
+    std::string input0Name = std::string("input_0");
+    std::string input1Name = std::string("input_1");
     std::string outputName = std::string("output0_");
     std::string blockName;
 
@@ -20,15 +20,9 @@ TosaSerializationBasicBlock* ConvertElementwiseBinaryToTosaOperator(const Layer*
     // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
     if(layer != nullptr)
     {
-        // Get the layers connected to the input slots and determine unique tensor names.
-        Layer& connectedLayer0 = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
-        input0Name = GenerateUniqueName(connectedLayer0, 0);
-
-        Layer& connectedLayer1 = layer->GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer();
-        input1Name = GenerateUniqueName(connectedLayer1, 1);
-
-        // Determine unique output tensor name.
-        outputName = GenerateUniqueOutputName(*layer, 0);
+        input0Name = GenerateUniqueInputName(layer->GetInputSlot(0));
+        input1Name = GenerateUniqueInputName(layer->GetInputSlot(1));
+        outputName = GenerateUniqueOutputName(*layer);
     }
 
     TosaSerializationOperator* op = nullptr;
@@ -93,13 +87,13 @@ TosaSerializationBasicBlock* ConvertElementwiseBinaryToTosaOperator(const Layer*
     // Only add input tensors if connected layer is an input layer.
     // As intermediate or constant tensors will be created separately.
     // There also can't be duplicate tensor.
-    if(input0Name.find("input0_") != std::string::npos)
+    if(input0Name.find("input_") != std::string::npos)
     {
         std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputs[0]->GetShape());
         DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType());
         tensors.push_back(new TosaSerializationTensor(input0Name, inputShape0, inputDType0, {}));
     }
-    if(input1Name.find("input1_") != std::string::npos)
+    if(input1Name.find("input_") != std::string::npos)
     {
         std::vector<int32_t> inputShape1 = GetTosaTensorShape(inputs[1]->GetShape());
         DType inputDType1 = ArmNNToDType(inputs[1]->GetDataType());
diff --git a/src/backends/tosaCommon/operatorMappings/ElementwiseUnaryOperator.cpp b/src/backends/tosaCommon/operatorMappings/ElementwiseUnaryOperator.cpp
index 02dddab8bc..d0eac0b4f4 100644
--- a/src/backends/tosaCommon/operatorMappings/ElementwiseUnaryOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/ElementwiseUnaryOperator.cpp
@@ -10,7 +10,7 @@ TosaSerializationBasicBlock* ConvertElementwiseUnaryOperator(const Layer* layer,
                                                              const std::vector<const TensorInfo*>& outputs,
                                                              const ElementwiseUnaryDescriptor* unaryDescriptor)
 {
-    std::string input0Name = std::string("input0_");
+    std::string input0Name = std::string("input_");
     std::string outputName = std::string("output0_");
     std::string blockName  = std::string("Op_ELEMENTWISEUNARY_block_") + GetUniqueTosaMappingID();
 
@@ -19,12 +19,8 @@ TosaSerializationBasicBlock* ConvertElementwiseUnaryOperator(const Layer* layer,
     // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
     if(layer != nullptr)
     {
-        // Get the layer connected to the input slot and determine unique the tensor name.
-        Layer& connectedLayer0 = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
-        input0Name = GenerateUniqueName(connectedLayer0, 0);
-
-        // Determine unique output tensor name.
-        outputName = GenerateUniqueOutputName(*layer, 0);
+        input0Name = GenerateUniqueInputName(layer->GetInputSlot(0));
+        outputName = GenerateUniqueOutputName(*layer);
     }
 
     TosaSerializationOperator* op = nullptr;
@@ -48,7 +44,7 @@ TosaSerializationBasicBlock* ConvertElementwiseUnaryOperator(const Layer* layer,
     // Only add input tensor if connected layer is an input layer.
     // As intermediate or constant tensors will be created separately.
     // There also can't be duplicate tensor.
-    if(input0Name.find("input0_") != std::string::npos)
+    if(input0Name.find("input_") != std::string::npos)
     {
         std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputs[0]->GetShape());
         DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType());
diff --git a/src/backends/tosaCommon/operatorMappings/Pooling2DOperator.cpp b/src/backends/tosaCommon/operatorMappings/Pooling2DOperator.cpp
index c33f61296a..56e3f3402c 100644
--- a/src/backends/tosaCommon/operatorMappings/Pooling2DOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/Pooling2DOperator.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -13,7 +13,7 @@ TosaSerializationBasicBlock* ConvertPooling2DToTosaOperator(const Layer* layer,
     std::string poolType = (poolDescriptor->m_PoolType == PoolingAlgorithm::Max) ? "Op_MAX" : "Op_AVG";
     Op opcode = (poolDescriptor->m_PoolType == PoolingAlgorithm::Max) ? Op_MAX_POOL2D : Op_AVG_POOL2D;
 
-    std::string input0Name = std::string("input0_");
+    std::string input0Name = std::string("input_");
     std::string outputName = std::string("output0_");
     std::string blockName  = std::string("Op_") + poolType + std::string("_POOL2D_block_") + GetUniqueTosaMappingID();
 
@@ -21,12 +21,8 @@ TosaSerializationBasicBlock* ConvertPooling2DToTosaOperator(const Layer* layer,
     // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
     if(layer != nullptr)
     {
-        // Get the layers connected to the input slots and determine unique tensor names.
-        Layer& connectedInputLayer = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
-        input0Name = GenerateUniqueName(connectedInputLayer, 0);
-
-        // Determine unique output tensor name.
-        outputName = GenerateUniqueOutputName(*layer, 0);
+        input0Name = GenerateUniqueInputName(layer->GetInputSlot(0));
+        outputName = GenerateUniqueOutputName(*layer);
     }
 
     std::vector<int> pad = {static_cast<int>(poolDescriptor->m_PadTop),
@@ -50,7 +46,7 @@ TosaSerializationBasicBlock* ConvertPooling2DToTosaOperator(const Layer* layer,
     // Only add input tensors if connected layer is an input layer.
     // As intermediate or constant tensors will be created separately.
     // There also can't be duplicate tensor.
-    if(input0Name.find("input0_") != std::string::npos)
+    if(input0Name.find("input_") != std::string::npos)
     {
         std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputs[0]->GetShape());
         DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType());
diff --git a/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp b/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp
index 1107add6e9..a4d7d0ed28 100644
--- a/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 // Copyright © 2020 The TensorFlow Authors. All Rights Reserved.
@@ -8,6 +8,8 @@
 
 #include "QuantizeOperator.hpp"
 
+#include "TosaRescaleOperatorUtils.hpp"
+
 // This function is paraphrased from:
 // tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc from function convertQuantizeOp
 TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
@@ -19,11 +21,7 @@ TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
     ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( outputs.size() == 1,
                                          "ConvertQuantizeToTosaOperator: Quantize must have only one output" );
 
-    std::string inputName           = std::string("input0_");
-    std::string outputNameZeroPoint = std::string("intermediate0_") + GetUniqueTosaMappingID();
-    std::string outputNameScale     = std::string("intermediate1_") + GetUniqueTosaMappingID();
-    std::string outputNameMul       = std::string("intermediate2_") + GetUniqueTosaMappingID();
-    std::string outputNameAdd       = std::string("intermediate3_") + GetUniqueTosaMappingID();
+    std::string inputName           = std::string("input_");
     std::string outputName          = std::string("output0_");
     std::string blockName           = std::string("Op_QUANTIZE_block_") + GetUniqueTosaMappingID();
 
@@ -31,12 +29,8 @@ TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
     // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
     if(layer != nullptr)
     {
-        // Get the layers connected to the input slots and determine unique tensor names.
-        Layer& connectedLayer = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
-        inputName = GenerateUniqueName(connectedLayer, 0);
-
-        // Determine unique output tensor name.
-        outputName = GenerateUniqueOutputName(*layer, 0);
+        inputName  = GenerateUniqueInputName(layer->GetInputSlot(0));
+        outputName = GenerateUniqueOutputName(*layer);
     }
 
     const TensorInfo inputInfo = *inputs[0];
@@ -55,85 +49,119 @@ TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
 
     std::vector<TosaSerializationTensor*> tensors;
 
+    std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputInfo.GetShape());
+    DType inputDType0 = ArmNNToDType(inputInfo.GetDataType());
+    bool isFloatInput = inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32;
+
     // Only add input tensors if connected layer is an input layer.
     // As intermediate or constant tensors will be created separately.
     // There also can't be duplicate tensor.
-    std::vector<int32_t> inputShape0;
-    DType inputDType0 =  DType::DType_UNKNOWN;
-    if(inputName.find("input0_") != std::string::npos)
+    if(inputName.find("input_") != std::string::npos)
     {
-        inputShape0 = GetTosaTensorShape(inputInfo.GetShape());
-        inputDType0 = ArmNNToDType(inputInfo.GetDataType());
-        ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32,
-                                             "ConvertQuantizeToTosaOperator: Quantize input must be of type Float" );
         tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {}));
     }
 
     std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputInfo.GetShape());
     DType outputDType0 = ArmNNToDType(outputInfo.GetDataType());
 
-    // quantize:
-    // const_zeroPoint = constant(zeroPoint)
-    // const_scale = constant(scale)
-    // out_mul = mul(input, const_scale)
-    // out_add = add(out_mul, const_zeroPoint)
-    // output = cast<output_type>(out_add)
-
-    // const_zeroPoint
-    TosaSerializationOperator* zeroPointOp = nullptr;
-    TosaSerializationTensor* zeroPointTensor = nullptr;
-    CreateConstTosaOperator<float>(outputNameZeroPoint,
-                                   zeroPoint,
-                                   inputDType0,
-                                   inputShape0,
-                                   zeroPointOp,
-                                   zeroPointTensor);
-    tensors.push_back(zeroPointTensor);
-
-    // const_scale
-    TosaSerializationOperator *scaleOp = nullptr;
-    TosaSerializationTensor* scaleTensor = nullptr;
-    CreateConstTosaOperator<float>(outputNameScale,
-                                   scale,
-                                   inputDType0,
-                                   inputShape0,
-                                   scaleOp,
-                                   scaleTensor);
-    tensors.push_back(scaleTensor);
-
-    // mul
-    int32_t shift = 0;
-    TosaMulAttribute mulAttribute(shift);
-    TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,
-                                                                     Attribute_MulAttribute,
-                                                                     &mulAttribute,
-                                                                     {inputName, outputNameScale},
-                                                                     {outputNameMul});
-    tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));
-
-    // add
-    TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,
-                                                                     Attribute_NONE,
-                                                                     nullptr,
-                                                                     {outputNameMul, outputNameZeroPoint},
-                                                                     {outputNameAdd});
-    tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));
-
-    // cast
-    TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,
-                                                                      Attribute_NONE,
-                                                                      nullptr,
-                                                                      {outputNameAdd},
-                                                                      {outputName});
-
-    tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
-
-    // operatorInputNames/operatorOutputNames ends up being the same as
-    // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
-    return new TosaSerializationBasicBlock(blockName,                                       // name
-                                           mainName,                                        // region name
-                                           {zeroPointOp, scaleOp, mulOp, addOp, castOp},    // operators
-                                           tensors,                                         // tensors
-                                           {inputName},                                     // inputs
-                                           {outputName});                                   // outputs
-}
+    if (isFloatInput)
+    {
+        // quantize:
+        // const_zeroPoint = constant(zeroPoint)
+        // const_scale = constant(scale)
+        // out_mul = mul(input, const_scale)
+        // out_add = add(out_mul, const_zeroPoint)
+        // output = cast<output_type>(out_add)
+
+        std::string outputNameScale     = std::string("input1_") + GetUniqueTosaMappingID();
+        std::string outputNameZeroPoint = std::string("input2_") + GetUniqueTosaMappingID();
+        std::string outputNameMul       = std::string("intermediate0_") + GetUniqueTosaMappingID();
+        std::string outputNameAdd       = std::string("intermediate1_") + GetUniqueTosaMappingID();
+
+        // const_zeroPoint
+        TosaSerializationOperator* zeroPointOp = nullptr;
+        TosaSerializationTensor* zeroPointTensor = nullptr;
+        CreateConstTosaOperator<float>(outputNameZeroPoint,
+                                       zeroPoint,
+                                       inputDType0,
+                                       inputShape0,
+                                       zeroPointOp,
+                                       zeroPointTensor);
+        tensors.push_back(zeroPointTensor);
+
+        // const_scale
+        TosaSerializationOperator *scaleOp = nullptr;
+        TosaSerializationTensor* scaleTensor = nullptr;
+        CreateConstTosaOperator<float>(outputNameScale,
+                                       scale,
+                                       inputDType0,
+                                       inputShape0,
+                                       scaleOp,
+                                       scaleTensor);
+        tensors.push_back(scaleTensor);
+
+        // mul
+        int32_t shift = 0;
+        TosaMulAttribute mulAttribute(shift);
+        TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,
+                                                                         Attribute_MulAttribute,
+                                                                         &mulAttribute,
+                                                                         {inputName, outputNameScale},
+                                                                         {outputNameMul});
+        tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));
+
+        // add
+        TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,
+                                                                         Attribute_NONE,
+                                                                         nullptr,
+                                                                         {outputNameMul, outputNameZeroPoint},
+                                                                         {outputNameAdd});
+        tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));
+
+        // cast
+        TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,
+                                                                          Attribute_NONE,
+                                                                          nullptr,
+                                                                          {outputNameAdd},
+                                                                          {outputName});
+
+        tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
+
+        // operatorInputNames/operatorOutputNames ends up being the same as
+        // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
+        return new TosaSerializationBasicBlock(blockName,                                       // name
+                                               mainName,                                        // region name
+                                               {zeroPointOp, scaleOp, mulOp, addOp, castOp},    // operators
+                                               tensors,                                         // tensors
+                                               {inputName},                                     // inputs
+                                               {outputName});                                   // outputs
+    }
+    else
+    {
+        double scale_alpha      = inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale();
+        int32_t input_zp        = inputs[0]->GetQuantizationOffset();
+        int32_t output_zp       = outputs[0]->GetQuantizationOffset();
+
+        TosaSerializationOperator* rescaleOp = nullptr;
+        CreateRescaleTosaOperator(inputName,
+                                  outputName,
+                                  scale_alpha,
+                                  input_zp,
+                                  output_zp,
+                                  true,
+                                  true,
+                                  &rescaleOp);
+        tensors.push_back(new TosaSerializationTensor(outputName,
+                                                      inputShape0,
+                                                      outputDType0, {}));
+
+        // operatorInputNames/operatorOutputNames ends up being the same as
+        // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
+        return new TosaSerializationBasicBlock(blockName,      // name
+                                               mainName,       // region name
+                                               {rescaleOp},    // operators
+                                               tensors,        // tensors
+                                               {inputName},    // inputs
+                                               {outputName});  // outputs
+    }
+}
+\ No newline at end of file
diff --git a/src/backends/tosaCommon/operatorMappings/ReshapeOperator.cpp b/src/backends/tosaCommon/operatorMappings/ReshapeOperator.cpp
index 55d66806b7..e7e5dc77d9 100644
--- a/src/backends/tosaCommon/operatorMappings/ReshapeOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/ReshapeOperator.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -10,7 +10,7 @@ TosaSerializationBasicBlock* ConvertReshapeToTosaOperator(const Layer* layer,
                                                           const std::vector<const TensorInfo*>& outputs,
                                                           const ReshapeDescriptor* reshapeDescriptor)
 {
-    std::string inputName = std::string("input0_");
+    std::string inputName = std::string("input_");
     std::string outputName = std::string("output0_");
     std::string blockName  = std::string("Op_RESHAPE_block_") + GetUniqueTosaMappingID();
 
@@ -18,12 +18,8 @@ TosaSerializationBasicBlock* ConvertReshapeToTosaOperator(const Layer* layer,
     // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
     if(layer != nullptr)
     {
-        // Get the layers connected to the input slots and determine unique tensor names.
-        Layer& connectedLayer = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
-        inputName = GenerateUniqueName(connectedLayer, 0);
-
-        // Determine unique output tensor name.
-        outputName = GenerateUniqueOutputName(*layer, 0);
+        inputName  = GenerateUniqueInputName(layer->GetInputSlot(0));
+        outputName = GenerateUniqueOutputName(*layer);
     }
 
     TosaReshapeAttribute attribute(GetTosaTensorShape(reshapeDescriptor->m_TargetShape));
@@ -39,7 +35,7 @@ TosaSerializationBasicBlock* ConvertReshapeToTosaOperator(const Layer* layer,
     // Only add input tensors if connected layer is an input layer.
     // As intermediate or constant tensors will be created separately.
     // There also can't be duplicate tensor.
-    if(inputName.find("input0_") != std::string::npos)
+    if(inputName.find("input_") != std::string::npos)
     {
         std::vector<int32_t> inputShape = GetTosaTensorShape(inputs[0]->GetShape());
         DType inputDType = ArmNNToDType(inputs[0]->GetDataType());
diff --git a/src/backends/tosaCommon/operatorMappings/ResizeOperator.cpp b/src/backends/tosaCommon/operatorMappings/ResizeOperator.cpp
index 72c7352a65..bb1eabd27b 100644
--- a/src/backends/tosaCommon/operatorMappings/ResizeOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/ResizeOperator.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 // Copyright © 2020, 2023 The TensorFlow Authors. All Rights Reserved.
@@ -37,7 +37,7 @@ TosaSerializationBasicBlock* ConvertResizeToTosaOperator(const Layer* layer,
         throw armnn::InvalidArgumentException("ConvertResizeToTosaOperator: Unsupported Resize method.");
     }
 
-    std::string inputName = std::string("input0_");
+    std::string inputName = std::string("input_");
     std::string outputName = std::string("output0_");
     std::string blockName  = std::string("Op_RESIZE_block_") + GetUniqueTosaMappingID();
 
@@ -45,12 +45,8 @@ TosaSerializationBasicBlock* ConvertResizeToTosaOperator(const Layer* layer,
     // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
     if(layer != nullptr)
     {
-        // Get the layers connected to the input slots and determine unique tensor names.
-        Layer& connectedLayer = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
-        inputName = GenerateUniqueName(connectedLayer, 0);
-
-        // Determine unique output tensor name.
-        outputName = GenerateUniqueOutputName(*layer, 0);
+        inputName  = GenerateUniqueInputName(layer->GetInputSlot(0));
+        outputName = GenerateUniqueOutputName(*layer);
     }
 
     int32_t inputHeight = static_cast<int32_t>(inputs[0]->GetShape()[1]);
@@ -149,7 +145,7 @@ TosaSerializationBasicBlock* ConvertResizeToTosaOperator(const Layer* layer,
     // Only add input tensors if connected layer is an input layer.
     // As intermediate or constant tensors will be created separately.
     // There also can't be duplicate tensor.
-    if(inputName.find("input0_") != std::string::npos)
+    if(inputName.find("input_") != std::string::npos)
     {
         std::vector<int32_t> inputShape = GetTosaTensorShape(inputs[0]->GetShape());
         DType inputDType = ArmNNToDType(inputs[0]->GetDataType());
diff --git a/src/backends/tosaCommon/operatorMappings/SliceOperator.cpp b/src/backends/tosaCommon/operatorMappings/SliceOperator.cpp
index 294d38937f..5fe0c8da46 100644
--- a/src/backends/tosaCommon/operatorMappings/SliceOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/SliceOperator.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -10,7 +10,7 @@ TosaSerializationBasicBlock* ConvertSliceToTosaOperator(const Layer* layer,
                                                         const std::vector<const TensorInfo*>& outputs,
                                                         const SliceDescriptor* sliceDescriptor)
 {
-    std::string inputName = std::string("input0_");
+    std::string inputName = std::string("input_");
     std::string outputName = std::string("output0_");
     std::string blockName  = std::string("Op_SLICE_block_") + GetUniqueTosaMappingID();
 
@@ -18,12 +18,8 @@ TosaSerializationBasicBlock* ConvertSliceToTosaOperator(const Layer* layer,
     // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
     if(layer != nullptr)
     {
-        // Get the layers connected to the input slots and determine unique tensor names.
-        Layer& connectedLayer = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
-        inputName = GenerateUniqueName(connectedLayer, 0);
-
-        // Determine unique output tensor name.
-        outputName = GenerateUniqueOutputName(*layer, 0);
+        inputName  = GenerateUniqueInputName(layer->GetInputSlot(0));
+        outputName = GenerateUniqueOutputName(*layer);
     }
 
     std::vector<int32_t> begin(sliceDescriptor->m_Begin.begin(), sliceDescriptor->m_Begin.end());
@@ -42,7 +38,7 @@ TosaSerializationBasicBlock* ConvertSliceToTosaOperator(const Layer* layer,
     // Only add input tensors if connected layer is an input layer.
     // As intermediate or constant tensors will be created separately.
     // There also can't be duplicate tensor.
-    if(inputName.find("input0_") != std::string::npos)
+    if(inputName.find("input_") != std::string::npos)
     {
         std::vector<int32_t> inputShape = GetTosaTensorShape(inputs[0]->GetShape());
         DType inputDType = ArmNNToDType(inputs[0]->GetDataType());
diff --git a/src/backends/tosaCommon/operatorMappings/SplitOperator.cpp b/src/backends/tosaCommon/operatorMappings/SplitOperator.cpp
index f8c60b1b6d..53f4f052bb 100644
--- a/src/backends/tosaCommon/operatorMappings/SplitOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/SplitOperator.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 // Copyright © 2020 The TensorFlow Authors. All Rights Reserved.
@@ -7,6 +7,7 @@
 //
 
 #include "SplitOperator.hpp"
+#include <backendsCommon/WorkloadUtils.hpp>
 
 // This function is paraphrased from:
 // tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc from function convertSplitOp
@@ -26,7 +27,7 @@ TosaSerializationBasicBlock* ConvertSplitToTosaOperator(const Layer* layer,
         throw armnn::Exception("ConvertSplitToTosaOperator: Dynamic input dimensions are unsupported.");
     }
 
-    std::string inputName = std::string("input0_");
+    std::string inputName = std::string("input_");
     std::vector<std::string> outputNames;
     std::string blockName  = std::string("Op_SPLIT_block_") + GetUniqueTosaMappingID();
 
@@ -35,9 +36,7 @@ TosaSerializationBasicBlock* ConvertSplitToTosaOperator(const Layer* layer,
     // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
     if(layer != nullptr)
     {
-        // Get the layers connected to the input slots and determine unique tensor names.
-        Layer& connectedLayer = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
-        inputName = GenerateUniqueName(connectedLayer, 0);
+        inputName = GenerateUniqueInputName(layer->GetInputSlot(0));
 
         for (unsigned int i=0; i < numSplit; ++i)
         {
@@ -56,26 +55,19 @@ TosaSerializationBasicBlock* ConvertSplitToTosaOperator(const Layer* layer,
         }
     }
 
-    // Each slice op has a different beginning point.
-    // The size is the same for each slice op.
-    std::vector<int32_t> beginVals;
-    beginVals.reserve(inputs[0]->GetNumDimensions());
-    std::vector<int32_t> sizeVals;
-    sizeVals.reserve(inputs[0]->GetNumDimensions());
-    for (unsigned int j = 0; j < inputs[0]->GetNumDimensions(); ++j)
+    // Configure input and output tensors
+    std::set<unsigned int> splitAxis = ComputeSplitAxis(*splitDescriptor, inputs[0]->GetShape());
+    if (splitAxis.size() != 1)
     {
-        beginVals.emplace_back(0);
-        uint32_t dim = inputs[0]->GetShape()[j];
-        sizeVals.emplace_back(dim);
+        throw InvalidArgumentException("Cannot derive split axis from SplitterDescriptor");
     }
-
-    uint32_t axis = static_cast<uint32_t>(splitDescriptor->GetAxis());
-    sizeVals[axis] = sizeVals[axis] / static_cast<int32_t>(numSplit);
+    uint32_t axis = *splitAxis.begin();
 
     std::vector<TosaSerializationOperator*> ops;
-    for (unsigned int i=0; i < numSplit; ++i)
+    std::vector<int32_t> beginVals(inputs[0]->GetNumDimensions(), 0);
+    for (unsigned int i = 0; i < numSplit; ++i)
     {
-        beginVals[axis] = static_cast<int>(i) * sizeVals[axis];
+        std::vector<int32_t> sizeVals = GetTosaTensorShape(outputs[i]->GetShape());
         TosaSliceAttribute attribute(beginVals, sizeVals);
         auto* op = new TosaSerializationOperator(Op_SLICE,
                                                  Attribute_SliceAttribute,
@@ -84,13 +76,16 @@ TosaSerializationBasicBlock* ConvertSplitToTosaOperator(const Layer* layer,
                                                  {outputNames[i]});
 
         ops.push_back(op);
+
+        // Update the axis begin value for the next split operation, to be the correct size axis value.
+        beginVals[axis] += sizeVals[axis];
     }
 
     std::vector<TosaSerializationTensor*> tensors;
     // Only add input tensors if connected layer is an input layer.
     // As intermediate or constant tensors will be created separately.
     // There also can't be duplicate tensor.
-    if(inputName.find("input0_") != std::string::npos)
+    if(inputName.find("input_") != std::string::npos)
     {
         std::vector<int32_t> inputShape = GetTosaTensorShape(inputs[0]->GetShape());
         DType inputDType = ArmNNToDType(inputs[0]->GetDataType());
@@ -98,13 +93,13 @@ TosaSerializationBasicBlock* ConvertSplitToTosaOperator(const Layer* layer,
         tensors.push_back(new TosaSerializationTensor(inputName, inputShape, inputDType, {}));
     }
 
-    std::vector<int32_t> outputShape = GetTosaTensorShape(outputs[0]->GetShape());
     DType outputDType = ArmNNToDType(outputs[0]->GetDataType());
-
-    for (unsigned int i=0; i < numSplit; ++i)
+    for (unsigned int i = 0; i < numSplit; ++i)
     {
+        std::vector<int32_t> outputShape = GetTosaTensorShape(outputs[i]->GetShape());
         tensors.push_back(new TosaSerializationTensor(outputNames[i], outputShape, outputDType, {}));
     }
+
     // operatorInputNames/operatorOutputNames ends up being the same as
     // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
     return new TosaSerializationBasicBlock(blockName, // name
diff --git a/src/backends/tosaCommon/operatorMappings/TosaOperatorUtils.hpp b/src/backends/tosaCommon/operatorMappings/TosaOperatorUtils.hpp
index 047e0a1f42..f566504a40 100644
--- a/src/backends/tosaCommon/operatorMappings/TosaOperatorUtils.hpp
+++ b/src/backends/tosaCommon/operatorMappings/TosaOperatorUtils.hpp
@@ -48,6 +48,33 @@ inline DType ArmNNToDType(const DataType& type)
     }
 }
 
+// Function to return ArmNN datatype from input Tosa datatype.
+inline DataType DtypeToArmNN(const DType type)
+{
+    switch (type)
+    {
+        case DType_FP16:
+            return DataType::Float16;
+        case DType_BF16:
+            return DataType::BFloat16;
+        case DType_FP32:
+            return DataType::Float32;
+        case DType_UINT8:
+            return DataType::QAsymmU8;
+        case DType_INT8:
+            return DataType::QSymmS8;
+        case DType_INT16:
+            return DataType::QSymmS16;
+        case DType_INT32:
+            return DataType::Signed32;
+        case DType_BOOL:
+            return DataType::Boolean;
+        default:
+            throw armnn::Exception("DtypeToArmNN: Unsupported tosa::DType in ArmNN.");
+            return DataType::Boolean;
+    }
+}
+
 // Function to return Tosa tensor shape from input ArmNN tensor shape.
 inline std::vector<int32_t> GetTosaTensorShape(const TensorShape& shape)
 {
@@ -60,7 +87,7 @@ inline std::vector<int32_t> GetTosaTensorShape(const TensorShape& shape)
 }
 
 // Function that generates unique name using the layer type, input slot and layer guid.
-inline std::string GenerateUniqueName(const Layer& layer, uint32_t layerSlot)
+static std::string GenerateUniqueName(const Layer& layer, uint32_t layerSlot)
 {
     std::string guid        = std::to_string(layer.GetGuid());
     std::string slotAndGuid = std::to_string(layerSlot) + "_" + guid;
@@ -68,7 +95,7 @@ inline std::string GenerateUniqueName(const Layer& layer, uint32_t layerSlot)
     switch (layer.GetType())
     {
         case LayerType::Input:
-            return "input" + slotAndGuid;
+            return "input_" + guid;
         case LayerType::Output:
             return "output" + slotAndGuid;
         case LayerType::Constant:
@@ -78,8 +105,19 @@ inline std::string GenerateUniqueName(const Layer& layer, uint32_t layerSlot)
     }
 }
 
+// Function that generates unique name for the parent layer from the child layer input slot.
+inline std::string GenerateUniqueInputName(const armnn::InputSlot& slot)
+{
+    // Get the layers connected to the input slots and determine unique tensor names.
+    Layer& connectedLayer = slot.GetConnectedOutputSlot()->GetOwningLayer();
+    // For layer input, we want to ensure we get the correct output slot of the parent layer.
+    // For example, if parent layer is split, the parent output slot could be 0 or 1 index.
+    uint32_t connectedOutputSlotIdx = slot.GetConnectedOutputSlot()->CalculateIndexOnOwner();
+    return GenerateUniqueName(connectedLayer, connectedOutputSlotIdx);
+}
+
 // Function that generates unique output name using the layer type, input slot and layer guid.
-inline std::string GenerateUniqueOutputName(const Layer& layer, uint32_t layerSlot)
+inline std::string GenerateUniqueOutputName(const Layer& layer, uint32_t layerSlot = 0)
 {
     Layer& connectedLayer = layer.GetOutputSlot().GetConnection(0)->GetOwningLayer();
 
@@ -416,6 +454,12 @@ inline std::vector<uint8_t> CreateConstTosaData(const void* value,
             error = TosaSerializationHandler::ConvertI8toU8(data, uint8Data);
             break;
         }
+        case DType::DType_UINT8:
+        {
+            const int8_t* copy_data = static_cast<const int8_t*>(value);
+            uint8Data.assign(copy_data, copy_data + numElements);
+            break;
+        }
         case DType::DType_INT4:
         {
             std::vector<int8_t> data(numElements, *static_cast<const int8_t*>(value));
diff --git a/src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp b/src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp
index 1a4dd7aac3..c37d6519bb 100644
--- a/src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp
+++ b/src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp
@@ -9,31 +9,27 @@
 
 inline void CreateRescaleTosaOperator(const std::string& inputName,
                                       const std::string& outputName,
-                                      DType output_type,
-                                      const std::vector<int32_t>& shape,
-                                      int32_t scale_multiplier,
-                                      int32_t scale_shift,
+                                      const std::vector<int32_t>& multipliers,
+                                      const std::vector<int32_t>& shifts,
                                       int32_t input_zp,
                                       int32_t output_zp,
                                       bool double_round,
                                       bool scale32,
-                                      TosaSerializationOperator** op,
-                                      TosaSerializationTensor** tensor)
+                                      bool per_channel,
+                                      TosaSerializationOperator** op)
 {
     if (!op)
     {
         throw armnn::Exception("CreateRescaleTosaOperator: nullptr op");
     }
 
-    std::vector<int32_t> multipliers{scale_multiplier};
-    std::vector<int32_t> shifts{scale_shift};
     TosaRescaleAttribute attribute(input_zp,
                                    output_zp,
                                    multipliers,
                                    shifts,
                                    scale32,
                                    double_round,
-                                   false,  // per_channel
+                                   per_channel,
                                    false,  // input_unsigned
                                    false); // output_unsigned
 
@@ -43,90 +39,185 @@ inline void CreateRescaleTosaOperator(const std::string& inputName,
     {
         throw armnn::Exception("CreateRescaleTosaOperator: failed to created operator");
     }
-    if (tensor != nullptr)
-    {
-        // tensor
-        *tensor = new TosaSerializationTensor(outputName, shape, output_type, {});
-        if (! (*tensor))
-        {
-            throw armnn::Exception("CreateRescaleTosaOperator: failed to created tensor");
-        }
-    }
 }
 
 inline void CreateRescaleTosaOperator(const std::string& inputName,
                                       const std::string& outputName,
-                                      DType output_type,
-                                      const std::vector<int32_t>& shape,
-                                      double scale,
+                                      int32_t scale_multiplier,
+                                      int32_t scale_shift,
                                       int32_t input_zp,
                                       int32_t output_zp,
                                       bool double_round,
                                       bool scale32,
-                                      TosaSerializationOperator** op,
-                                      TosaSerializationTensor** tensor)
+                                      bool per_channel,
+                                      TosaSerializationOperator** op)
 {
-    //  The code that follows is based on the behaviour specified in
-    //  https://www.mlplatform.org/tosa/tosa_spec.html#_precision_scaling
+    const std::vector<int32_t> multipliers{scale_multiplier};
+    const std::vector<int32_t> shifts{scale_shift};
+    CreateRescaleTosaOperator(inputName, outputName, multipliers, shifts,
+                              input_zp, output_zp, double_round, scale32, per_channel, op);
+}
+
+/// The following is taken from mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp in the LLVM project
+/// From a scale value, generates multiplier and shift values where
+/// mantissa is in [-1.0,-0.5] or [0.5, 1.0] such that
+/// multiplier = mantissa*2^shift for 32-bit scaling.
+static void ComputeMultiplierAndShiftTosaScale32(double scale,
+                                                 int32_t &multiplier,
+                                                 int32_t &shift)
+{
+    const double mantissa = std::frexp(scale, &shift);
+    auto shiftedM = std::round(mantissa * (int64_t(1) << 31));
 
-    auto GetScaleParams = [](double scale, double& m, int32_t& n)
+    // Can't be greater than 1.0.
+    if (!(shiftedM <= (int64_t(1) << 31)))
     {
-        m = 0;
-        n = 0;
+        throw armnn::Exception("Shifted mantissa exceeds 32 signed bits");
+    }
 
-        double lastErr = 1e06;
+    if (shiftedM == (int64_t(1) << 31))
+    {
+        shiftedM /= 2;
+        shift++;
+    }
 
-        const int32_t numExponents = 62;
-        const double start = 1.0;
-        const double end = 2.0;
+    // TOSA expects right shift to be positive, and embed (1 << 31) into right
+    // shift bits.
+    shift = (-shift) + 31;
 
-        // Slow iterative approach but running in Reference only
-        for (int32_t i = 0; i < numExponents; ++i)
-        {
-            double exp = 1.0 / (1 << i);
-            double currentM = scale / exp;    // Find current m given value = currentM  * exp
-            if ((currentM >= start) && (currentM < end))
-            {
-                double value = currentM * exp;
-                double err = std::abs(scale - value);
-                if (err < lastErr)
-                {
-                    // Take the m, n that minimize the error
-                    n = i;
-                    m = currentM;
-                    lastErr = err;
-                }
-            }
-        }
-    };
+    if (!(shiftedM <= std::numeric_limits<int32_t>::max()))
+    {
+        throw armnn::Exception("Shifted mantissa exceeds 32-bit signed output type");
+    }
+
+    multiplier = static_cast<int32_t>(shiftedM);
+
+    // Shifting tops out at 62 bits. Right shift to make 62 bits the max.
+    // The limit of 62 on shift allows the shift to be decomposed as
+    // two right shifts of 31.
+    if (shift > 62)
+    {
+        // Shifting the multiplier by more than 32-bits is unnecessary.
+        multiplier = multiplier >> std::min<int32_t>(31, shift - 62);
+        shift = 62;
+    }
+}
+
+/// The following is taken from mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp in the LLVM project
+/// From a scale value, generates multiplier and shift values where
+/// mantissa is in [-1.0,-0.5] or [0.5, 1.0] such that
+/// multiplier = mantissa*2^shift for 16-bit scaling.
+static void ComputeMultiplierAndShiftTosaScale16(double scale,
+                                                 int32_t &multiplier,
+                                                 int32_t &shift)
+{
+    const double mantissa = std::frexp(scale, &shift);
+    auto shiftedM = std::round(mantissa * (int64_t(1) << 15));
 
-    auto GetMultiplierShiftByScale = [GetScaleParams](bool scale32, double scale, int32_t& multiplier, int32_t& shift)
+    // Can't be greater than 1.0.
+    if (!(shiftedM <= (int64_t(1) << 15)))
     {
-        double m = 0;
-        int32_t n = 0;
+        throw armnn::Exception("Shifted mantissa exceeds 16 signed bits");
+    }
+
+    if (shiftedM == (int64_t(1) << 15))
+    {
+        shiftedM /= 2;
+        shift++;
+    }
+
+    // TOSA expects right shift to be positive and embed (1 << 15) into right
+    // shift bits.
+    shift = (-shift) + 15;
+
+    if (!(shiftedM <= std::numeric_limits<int32_t>::max()))
+    {
+        throw armnn::Exception("Shifted mantissa exceeds 32-bit signed output type");
+    }
 
-        GetScaleParams(scale, m, n);
+    multiplier = static_cast<int32_t>(shiftedM);
 
-        multiplier  = (scale32) ? (1 << 30) * static_cast<int32_t>(m) : (1 << 14) * static_cast<int32_t>(m);
-        shift       = (scale32) ? (30 + n) : (14 + n);
-    };
+    // Shifting tops out at 62 bits. Right shift to make 62 bits the max.
+    // The limit of 62 on shift allows the shift to be decomposed as
+    // two right shifts of 31.
+    if (shift > 62)
+    {
+        // Shifting the multiplier by more than 31-bits is unnecessary.
+        multiplier = multiplier >> std::min<int32_t>(31, shift - 62);
+        shift = 62;
+    }
+}
 
+inline void CreateRescaleTosaOperator(const std::string& inputName,
+                                      const std::string& outputName,
+                                      double scale,
+                                      int32_t input_zp,
+                                      int32_t output_zp,
+                                      bool double_round,
+                                      bool scale32,
+                                      TosaSerializationOperator** op)
+{
     int32_t multiplier;
     int32_t shift;
-    GetMultiplierShiftByScale(scale32, scale, multiplier, shift);
-    CreateRescaleTosaOperator(inputName, outputName, output_type, shape, multiplier, shift,
-                              input_zp, output_zp, double_round, scale32, op, tensor);
+
+    if (scale32)
+    {
+        ComputeMultiplierAndShiftTosaScale32(scale, multiplier, shift);
+    }
+    else
+    {
+        ComputeMultiplierAndShiftTosaScale16(scale, multiplier, shift);
+    }
+
+    CreateRescaleTosaOperator(inputName, outputName, multiplier, shift,
+                              input_zp, output_zp, double_round, scale32, false, op);
+}
+
+inline void CreateRescaleTosaOperatorPerChannel(const std::string& inputName,
+                                                const std::string& outputName,
+                                                int32_t input_zp,
+                                                int32_t output_zp,
+                                                bool double_round,
+                                                bool scale32,
+                                                double input_scale,
+                                                double output_scale,
+                                                const std::vector<float>& weight_scales,
+                                                TosaSerializationOperator** op)
+{
+    std::vector<int32_t> op_tensor_multipliers;
+    std::vector<int32_t> op_tensor_shifts;
+    op_tensor_multipliers.reserve(weight_scales.size());
+    op_tensor_shifts.reserve(weight_scales.size());
+
+    for (const float& weight_scale : weight_scales)
+    {
+        double op_tensor_scale = (input_scale * weight_scale) / output_scale;
+        int32_t multiplier;
+        int32_t shift;
+
+        if (scale32)
+        {
+            ComputeMultiplierAndShiftTosaScale32(op_tensor_scale, multiplier, shift);
+        }
+        else
+        {
+            ComputeMultiplierAndShiftTosaScale16(op_tensor_scale, multiplier, shift);
+        }
+
+        op_tensor_multipliers.push_back(multiplier);
+        op_tensor_shifts.push_back(shift);
+    }
+
+    CreateRescaleTosaOperator(inputName, outputName, op_tensor_multipliers, op_tensor_shifts,
+                              input_zp, output_zp, double_round, scale32, true, op);
 }
 
 inline void CreateFromInt32RescaleTosaOperator(const std::string& inputName,
                                                const std::string& outputName,
-                                                DType output_type,
-                                                const std::vector<int32_t>& shape,
-                                                double output_scale,
-                                                int32_t output_zp,
-                                                TosaSerializationOperator** op,
-                                                TosaSerializationTensor** tensor)
+                                               double output_scale,
+                                               int32_t output_zp,
+                                               TosaSerializationOperator** op)
 {
-    CreateRescaleTosaOperator(inputName, outputName, output_type, shape,
-                              output_scale, 0, output_zp, true, true, op, tensor);
+    CreateRescaleTosaOperator(inputName, outputName, output_scale,
+                              0, output_zp, true, true, op);
 }
diff --git a/src/backends/tosaCommon/operatorMappings/TransposeConv2dOperator.cpp b/src/backends/tosaCommon/operatorMappings/TransposeConv2dOperator.cpp
index 8c2ae9f2b5..81d58e04fe 100644
--- a/src/backends/tosaCommon/operatorMappings/TransposeConv2dOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/TransposeConv2dOperator.cpp
@@ -12,7 +12,7 @@ TosaSerializationBasicBlock* ConvertTransposeConv2dToTosaOperator(const Layer* l
                                                                   const std::vector<const TensorInfo*>& outputs,
                                                                   const TransposeConvolution2dDescriptor* descriptor)
 {
-    std::string input0Name = std::string("input0_");
+    std::string input0Name = std::string("input_");
     std::string input1Name = std::string("constant_") + GetUniqueTosaMappingID();
     std::string input2Name = std::string("constant_") + GetUniqueTosaMappingID();
     std::string outputName = std::string("output0_");
@@ -22,12 +22,8 @@ TosaSerializationBasicBlock* ConvertTransposeConv2dToTosaOperator(const Layer* l
     // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
     if(layer != nullptr)
     {
-        // Get the layers connected to the input slots and determine unique tensor names.
-        Layer& connectedInputLayer = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
-        input0Name = GenerateUniqueName(connectedInputLayer, 0);
-
-        // Determine unique output tensor name.
-        outputName = GenerateUniqueOutputName(*layer, 0);
+        input0Name = GenerateUniqueInputName(layer->GetInputSlot(0));
+        outputName = GenerateUniqueOutputName(*layer);
     }
 
     std::vector<TosaSerializationTensor*> tensors;
@@ -37,7 +33,7 @@ TosaSerializationBasicBlock* ConvertTransposeConv2dToTosaOperator(const Layer* l
     // Only add tensor if connected layer is an input layer.
     // As intermediate or constant tensors will be created separately.
     // There also can't be duplicate tensors.
-    if(input0Name.find("input0_") != std::string::npos)
+    if(input0Name.find("input_") != std::string::npos)
     {
         std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputs[0]->GetShape());
         DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType());
diff --git a/src/backends/tosaCommon/operatorMappings/TransposeOperator.cpp b/src/backends/tosaCommon/operatorMappings/TransposeOperator.cpp
index ccc77741c9..229a1b2421 100644
--- a/src/backends/tosaCommon/operatorMappings/TransposeOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/TransposeOperator.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -10,7 +10,7 @@ TosaSerializationBasicBlock* ConvertTransposeToTosaOperator(const Layer* layer,
                                                             const std::vector<const TensorInfo*>& outputs,
                                                             const TransposeDescriptor* transposeDescriptor)
 {
-    std::string input0Name = std::string("input0_");
+    std::string input0Name = std::string("input_");
     std::string outputName = std::string("output0_");
     std::string blockName  = std::string("Op_TRANSPOSE_block_") + GetUniqueTosaMappingID();
 
@@ -18,12 +18,8 @@ TosaSerializationBasicBlock* ConvertTransposeToTosaOperator(const Layer* layer,
     // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
     if(layer != nullptr)
     {
-        // Get the layers connected to the input slot and determine unique tensor name.
-        Layer& connectedLayer0 = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
-        input0Name = GenerateUniqueName(connectedLayer0, 0);
-
-        // Determine unique output tensor name.
-        outputName = GenerateUniqueOutputName(*layer, 0);
+        input0Name = GenerateUniqueInputName(layer->GetInputSlot(0));
+        outputName = GenerateUniqueOutputName(*layer);
     }
 
     std::vector<int32_t> mappings(transposeDescriptor->m_DimMappings.begin(),
@@ -42,7 +38,7 @@ TosaSerializationBasicBlock* ConvertTransposeToTosaOperator(const Layer* layer,
     // Only add input tensors if connected layer is an input layer.
     // As intermediate or constant tensors will be created separately.
     // There also can't be duplicate tensor.
-    if(input0Name.find("input0_") != std::string::npos)
+    if(input0Name.find("input_") != std::string::npos)
     {
         std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputs[0]->GetShape());
         DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType());
diff --git a/src/backends/tosaCommon/test/AvgPool2DIgnoreValueChecker.hpp b/src/backends/tosaCommon/test/AvgPool2DIgnoreValueChecker.hpp
index 6f57c4a61e..4c38d6b1e7 100644
--- a/src/backends/tosaCommon/test/AvgPool2DIgnoreValueChecker.hpp
+++ b/src/backends/tosaCommon/test/AvgPool2DIgnoreValueChecker.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022,2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -39,7 +39,7 @@ void VerifyAvgPool2DIgnoreValue(TosaSerializationBasicBlock* basicBlock,
         std::basic_string<char> blockInputName = basicBlock->GetInputs()[i];
         std::basic_string<char> operatorInputName  = padOp->GetInputTensorNames()[i];
 
-        std::string opStr = "input" + std::to_string(i) + "_";
+        std::string opStr = "input_";
 
         CHECK(blockInputName == operatorInputName);
         CHECK(basicBlock->GetTensorByName(blockInputName));
diff --git a/src/backends/tosaCommon/test/OneToManyMappingTests.cpp b/src/backends/tosaCommon/test/OneToManyMappingTests.cpp
index cc129f3e14..6ad6ea8d05 100644
--- a/src/backends/tosaCommon/test/OneToManyMappingTests.cpp
+++ b/src/backends/tosaCommon/test/OneToManyMappingTests.cpp
@@ -77,8 +77,7 @@ TEST_CASE("GetTosaMappingFromLayer_AvgPool2DIgnoreValueLayer")
     input0->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
     pool->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
 
-    TosaSerializationBasicBlock* basicBlock =
-        GetTosaMappingFromLayer(PolymorphicDowncast<Layer*>(pool));
+    TosaSerializationBasicBlock* basicBlock = GetTosaMappingFromLayer(PolymorphicDowncast<Layer*>(pool));
     VerifyAvgPool2DIgnoreValue(basicBlock,
                               inputShape,
                               outputShape,
@@ -139,8 +138,11 @@ TEST_CASE("GetTosaMapping_SplitLayer")
     armnn::TensorInfo inputTensorInfo({1, 18, 4, 4}, DataType::Float32);
     armnn::TensorInfo outputTensorInfo({1, 6, 4, 4}, DataType::Float32);
 
-    TosaSerializationBasicBlock* basicBlock =
-            GetTosaMapping(nullptr, LayerType::Splitter, {&inputTensorInfo}, {&outputTensorInfo}, descriptor);
+    TosaSerializationBasicBlock* basicBlock = GetTosaMapping(nullptr,
+                                                             LayerType::Splitter,
+                                                             {&inputTensorInfo},
+                                                             {&outputTensorInfo, &outputTensorInfo, &outputTensorInfo},
+                                                             descriptor);
 
     VerifySplit(basicBlock,
                 inShape,
@@ -200,17 +202,9 @@ TEST_CASE("GetTosaMapping_ActivationFloat32")
     ActivationEndToEndTest<DataType::Float32>(tosaDefaultBackends, ActivationFunction::LeakyReLu, 1.f, 0, 0.01f);
 }
 
-TEST_CASE("UNSUPPORTED_GetTosaMapping_ActivationFloat16")
+TEST_CASE("GetTosaMapping_ActivationFloat16")
 {
-    try
-    {
-        ActivationEndToEndTest<DataType::Float16>(tosaDefaultBackends, ActivationFunction::LeakyReLu, 1.f, 0, 0.01f);
-        FAIL("An exception should have been thrown");
-    }
-    catch (armnn::Exception& e)
-    {
-        CHECK_EQ(std::string(e.what()), "Failed to assign a backend to each layer");
-    }
+    ActivationEndToEndTest<DataType::Float16>(tosaDefaultBackends, ActivationFunction::LeakyReLu, 1.f, 0, 0.01f);
 }
 
 TEST_CASE("GetTosaMapping_ActivationInt32")
diff --git a/src/backends/tosaCommon/test/OneToOneMappingTests.cpp b/src/backends/tosaCommon/test/OneToOneMappingTests.cpp
index 267c9fb49d..8665aa9102 100644
--- a/src/backends/tosaCommon/test/OneToOneMappingTests.cpp
+++ b/src/backends/tosaCommon/test/OneToOneMappingTests.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -786,7 +786,7 @@ TEST_CASE("GetTosaMapping_TransposeConv2dLayer")
     CHECK(basicBlock->GetOperators().size() == 3);
     CHECK(basicBlock->GetTensors().size() == 4);
 
-    CHECK(basicBlock->GetInputs()[0].find("input0_") != std::string::npos);
+    CHECK(basicBlock->GetInputs()[0].find("input_") != std::string::npos);
     CHECK(basicBlock->GetInputs()[1].find("constant_") != std::string::npos);
     CHECK(basicBlock->GetInputs()[2].find("constant_") != std::string::npos);
     CHECK(basicBlock->GetOutputs()[0].find("output0_") != std::string::npos);
@@ -848,7 +848,7 @@ TEST_CASE("GetTosaMappingFromLayer_TransposeConv2dLayer")
     CHECK(basicBlock->GetOperators().size() == 3);
     CHECK(basicBlock->GetTensors().size() == 4);
 
-    CHECK(basicBlock->GetInputs()[0].find("input0_") != std::string::npos);
+    CHECK(basicBlock->GetInputs()[0].find("input_") != std::string::npos);
     CHECK(basicBlock->GetInputs()[1].find("constant_") != std::string::npos);
     CHECK(basicBlock->GetInputs()[2].find("constant_") != std::string::npos);
     CHECK(basicBlock->GetOutputs()[0].find("output0_") != std::string::npos);
diff --git a/src/backends/tosaCommon/test/SplitChecker.hpp b/src/backends/tosaCommon/test/SplitChecker.hpp
index edef4a1cf9..4a4eeba016 100644
--- a/src/backends/tosaCommon/test/SplitChecker.hpp
+++ b/src/backends/tosaCommon/test/SplitChecker.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -38,7 +38,7 @@ void VerifySplit(TosaSerializationBasicBlock* splitBlock,
         std::basic_string<char> blockInputName = splitBlock->GetInputs()[0];
         std::basic_string<char> operatorInputName = sliceOp->GetInputTensorNames()[0];
 
-        std::string opInputStr = "input" + std::to_string(0) + "_";
+        std::string opInputStr = "input_";
 
         CHECK(blockInputName == operatorInputName);
         CHECK(splitBlock->GetTensorByName(blockInputName));
diff --git a/src/backends/tosaCommon/test/TosaTestUtils.hpp b/src/backends/tosaCommon/test/TosaTestUtils.hpp
index 05dd164b50..a0eec74e12 100644
--- a/src/backends/tosaCommon/test/TosaTestUtils.hpp
+++ b/src/backends/tosaCommon/test/TosaTestUtils.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -284,20 +284,20 @@ inline void AssertTosaOneToOneMappingBasicBlock(TosaSerializationBasicBlock* bas
     CHECK(op->GetInputTensorNames().size() == numInputTensors);
     CHECK(op->GetOutputTensorNames().size() == numOutputs);
 
-    for (uint32_t i = 0; i < numInputs; i++)
+    for (uint32_t i = 0; i < numInputs; ++i)
     {
         std::basic_string<char> blockInputName = basicBlock->GetInputs()[i];
         std::basic_string<char> operatorInputName  = op->GetInputTensorNames()[i];
         std::basic_string<char> tensorName = basicBlock->GetTensors()[i]->GetName();
 
-        std::string opStr = "input" + std::to_string(i) + "_";
+        std::string opStr = "input_";
 
         CHECK(blockInputName == operatorInputName);
         CHECK(tensorName == operatorInputName);
         CHECK(blockInputName.find(opStr) != std::string::npos);
     }
 
-    for (uint32_t i = 0; i < numOutputs; i++)
+    for (uint32_t i = 0; i < numOutputs; ++i)
     {
         std::basic_string<char> blockOutputName = basicBlock->GetOutputs()[i];
         std::basic_string<char> operatorOutputName  = op->GetOutputTensorNames()[i];
diff --git a/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp b/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp
index 68531f89ff..a5f7ad88a8 100644
--- a/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp
+++ b/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp
@@ -5,6 +5,7 @@
 
 #include "backendsCommon/test/EndToEndTestImpl.hpp"
 
+#include "backendsCommon/test/ActivationEndToEndTestImpl.hpp"
 #include "backendsCommon/test/AdditionEndToEndTestImpl.hpp"
 #include "backendsCommon/test/Convolution2dEndToEndTestImpl.hpp"
 #include "backendsCommon/test/ConcatEndToEndTestImpl.hpp"
@@ -27,6 +28,25 @@ TEST_SUITE("TosaRefEndToEnd")
 {
 static std::vector<BackendId> tosaDefaultBackends = { "TosaRef" };
 
+// Activation
+//LeakyRelu
+TEST_CASE("TosaRefLeakyReluActivationFloat32")
+{
+    ActivationEndToEndTest<DataType::Float32>(tosaDefaultBackends, ActivationFunction::LeakyReLu, 1.f, 0, 0.01f);
+}
+TEST_CASE("TosaRefLeakyReluActivationFloat16")
+{
+    ActivationEndToEndTest<DataType::Float16>(tosaDefaultBackends, ActivationFunction::LeakyReLu, 0.3f, 5, 0.01f);
+}
+TEST_CASE("TosaRefLeakyReluActivationInt8")
+{
+    ActivationEndToEndTest<DataType::QAsymmS8>(tosaDefaultBackends, ActivationFunction::LeakyReLu, 0.6f, 7, 0.01f);
+}
+TEST_CASE("TosaRefLeakyReluActivationInt16")
+{
+    ActivationEndToEndTest<DataType::QSymmS16>(tosaDefaultBackends, ActivationFunction::LeakyReLu, 0.15f, 0, 0.01f);
+}
+
 // Addition
 TEST_CASE("TosaRefAdditionEndtoEndTestFloat32")
 {
@@ -95,6 +115,20 @@ TEST_CASE("TosaRefConv2dWithoutBiasEndtoEndTestFloat32")
     Convolution2dEndToEnd<armnn::DataType::Float32>(tosaDefaultBackends, armnn::DataLayout::NHWC, false);
 }
 
+TEST_CASE("TosaRefConv2dEndtoEndTestInt8")
+{
+    Convolution2dEndToEnd<armnn::DataType::QSymmS8,
+                          armnn::DataType::QSymmS8,
+                          armnn::DataType::Signed32>(tosaDefaultBackends, armnn::DataLayout::NHWC);
+}
+
+TEST_CASE("TosaRefConv2dWithoutBiasEndtoEndTestInt8")
+{
+    Convolution2dEndToEnd<armnn::DataType::QSymmS8,
+                          armnn::DataType::QSymmS8,
+                          armnn::DataType::Signed32>(tosaDefaultBackends, armnn::DataLayout::NHWC, false);
+}
+
 // Maximum
 TEST_CASE("TosaRefMaximumEndtoEndTestInt8")
 {
diff --git a/src/backends/tosaReference/test/TosaRefLayerSupportTests.cpp b/src/backends/tosaReference/test/TosaRefLayerSupportTests.cpp
index 759b37fe93..28d7753973 100644
--- a/src/backends/tosaReference/test/TosaRefLayerSupportTests.cpp
+++ b/src/backends/tosaReference/test/TosaRefLayerSupportTests.cpp
@@ -523,7 +523,7 @@ TEST_CASE("IsLayerSupportedTosaReferenceSplit")
     TosaRefLayerSupport supportChecker;
     std::string reasonIfNotSupported;
     auto supported = supportChecker.IsLayerSupported(LayerType::Splitter,
-                                                     {in, out},
+                                                     {in, out, out, out},
                                                      descriptor,
                                                      EmptyOptional(),
                                                      EmptyOptional(),
@@ -547,7 +547,7 @@ TEST_CASE("IsLayerSupportedTosaReferenceSplitUnsupported")
     TosaRefLayerSupport supportChecker;
     std::string reasonIfNotSupported;
     auto supported = supportChecker.IsLayerSupported(LayerType::Splitter,
-                                                     {in, out},
+                                                     {in, out, out, out},
                                                      descriptor,
                                                      EmptyOptional(),
                                                      EmptyOptional(),
diff --git a/tests/ExecuteNetwork/ArmNNExecutor.cpp b/tests/ExecuteNetwork/ArmNNExecutor.cpp
index 4518f1426f..2bbb9f9a8d 100644
--- a/tests/ExecuteNetwork/ArmNNExecutor.cpp
+++ b/tests/ExecuteNetwork/ArmNNExecutor.cpp
@@ -7,26 +7,150 @@
 #include "ArmNNExecutor.hpp"
 #include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
 
-#include <armnn/IAsyncExecutionCallback.hpp>
 #include <AsyncExecutionCallback.hpp>
-
+#include <armnn/IAsyncExecutionCallback.hpp>
+#include <armnnSerializer/ISerializer.hpp>
 
 using namespace armnn;
 using namespace std::chrono;
 
+/**
+ * Given a reference to an INetwork and a target directory, serialize the network to a file
+ * called "<timestamp>_network.armnn"
+ *
+ * @param network The network to serialize.
+ * @param dumpDir  The target directory.
+ * @return the full path to the serialized file.
+ */
+std::string SerializeNetwork(const armnn::INetwork& network, const std::string& dumpDir)
+{
+    if (dumpDir.empty())
+    {
+        throw InvalidArgumentException("An output directory must be specified.");
+    }
+    fs::path outputDirectory(dumpDir);
+    if (!exists(outputDirectory))
+    {
+        throw InvalidArgumentException(
+            fmt::format("The specified directory does not exist: {}", outputDirectory.c_str()));
+    }
+    auto serializer(armnnSerializer::ISerializer::Create());
+    // Serialize the Network
+    serializer->Serialize(network);
+
+    fs::path fileName;
+    fileName += dumpDir;
+    // used to get a timestamp to name diagnostic files (the ArmNN serialized graph
+    // and getSupportedOperations.txt files)
+    timespec ts;
+    if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts) == 0)
+    {
+        std::stringstream ss;
+        ss << std::to_string(ts.tv_sec) << "_" << std::to_string(ts.tv_nsec) << "_network.armnn";
+        fileName += ss.str();
+    }
+    else
+    {
+        // This is incredibly unlikely but just in case.
+        throw RuntimeException("clock_gettime, CLOCK_MONOTONIC_RAW returned a non zero result.");
+    }
+
+    // Save serialized network to a file
+    std::ofstream serializedFile(fileName, std::ios::out | std::ios::binary);
+    auto serialized = serializer->SaveSerializedToStream(serializedFile);
+    if (!serialized)
+    {
+        throw RuntimeException(fmt::format("An error occurred when serializing to file %s", fileName.c_str()));
+    }
+    serializedFile.flush();
+    serializedFile.close();
+    return fileName;
+}
+
+/**
+ * Given a reference to an optimized network and a target directory, serialize the network in .dot file format to
+ * a file called "<timestamp>_optimized_networkgraph.dot"
+ *
+ * @param network The network to serialize.
+ * @param dumpDir  The target directory.
+ * @return the full path to the serialized file.
+ */
+std::string SerializeNetworkToDotFile(const armnn::IOptimizedNetwork& optimizedNetwork, const std::string& dumpDir)
+{
+    if (dumpDir.empty())
+    {
+        throw InvalidArgumentException("An output directory must be specified.");
+    }
+    fs::path outputDirectory(dumpDir);
+    if (!exists(outputDirectory))
+    {
+        throw InvalidArgumentException(
+            fmt::format("The specified directory does not exist: {}", outputDirectory.c_str()));
+    }
+
+    fs::path fileName;
+    fileName += dumpDir;
+    // used to get a timestamp to name diagnostic files (the ArmNN serialized graph
+    // and getSupportedOperations.txt files)
+    timespec ts;
+    if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts) == 0)
+    {
+        std::stringstream ss;
+        ss << std::to_string(ts.tv_sec) << "_" << std::to_string(ts.tv_nsec) << "_optimized_networkgraph.dot";
+        fileName += ss.str();
+    }
+    else
+    {
+        // This is incredibly unlikely but just in case.
+        throw RuntimeException("clock_gettime, CLOCK_MONOTONIC_RAW returned a non zero result.");
+    }
+
+    // Write the network graph to a dot file.
+    std::ofstream fileStream;
+    fileStream.open(fileName, std::ofstream::out | std::ofstream::trunc);
+    if (!fileStream.good())
+    {
+        throw RuntimeException(fmt::format("An error occurred when creating %s", fileName.c_str()));
+    }
+
+    if (optimizedNetwork.SerializeToDot(fileStream) != armnn::Status::Success)
+    {
+        throw RuntimeException(fmt::format("An error occurred when serializing to file %s", fileName.c_str()));
+    }
+    fileStream.flush();
+    fileStream.close();
+    return fileName;
+}
+
 ArmNNExecutor::ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions)
-: m_Params(params)
+    : m_Params(params)
 {
-    runtimeOptions.m_EnableGpuProfiling = params.m_EnableProfiling;
+    runtimeOptions.m_EnableGpuProfiling  = params.m_EnableProfiling;
     runtimeOptions.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
 
     // Create/Get the static ArmNN Runtime. Note that the m_Runtime will be shared by all ArmNNExecutor
     // instances so the RuntimeOptions cannot be altered for different ArmNNExecutor instances.
     m_Runtime = GetRuntime(runtimeOptions);
 
-    auto parser = CreateParser();
+    auto parser  = CreateParser();
     auto network = parser->CreateNetwork(m_Params);
-    auto optNet = OptimizeNetwork(network.get());
+    auto optNet  = OptimizeNetwork(network.get());
+
+    // If the user has asked for detailed data write out the .armnn amd .dot files.
+    if (params.m_SerializeToArmNN)
+    {
+        // .armnn first.
+        // This could throw multiple exceptions if the directory cannot be created or the file cannot be written.
+        std::string targetDirectory(armnnUtils::Filesystem::CreateDirectory("/ArmNNSerializeNetwork"));
+        std::string fileName;
+        fileName = SerializeNetwork(*network, targetDirectory);
+        ARMNN_LOG(info) << "The pre-optimized network has been serialized to:" << fileName;
+        // and the .dot file.
+        // Most of the possible exceptions should have already occurred with the .armnn file.
+        fileName =
+            SerializeNetworkToDotFile(*optNet, targetDirectory);
+        ARMNN_LOG(info) << "The optimized network has been serialized to:" << fileName;
+    }
 
     m_IOInfo = GetIOInfo(optNet.get());
 
@@ -176,6 +300,12 @@ void ArmNNExecutor::ExecuteAsync()
 
 void ArmNNExecutor::ExecuteSync()
 {
+    // If we've only been asked to serialize the networks, don't execute the inference.
+    if (m_Params.m_SerializeToArmNN)
+    {
+        ARMNN_LOG(info) << "serialize-to-armnn has been specified. No inference will be executed.";
+        return;
+    }
     for (size_t x = 0; x < m_Params.m_Iterations; x++)
     {
         std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
index ffcb4f482c..c2bfb951d5 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022, 2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -67,6 +67,7 @@ struct ExecuteNetworkParams
     std::string                       m_ComparisonFile;
     std::vector<armnn::BackendId>     m_ComparisonComputeDevices;
     bool                              m_CompareWithTflite;
+    bool                              m_SerializeToArmNN;
     // Ensures that the parameters for ExecuteNetwork fit together
     void ValidateParams();
 
diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
index 5c1337f769..ba994b0da8 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
@@ -351,6 +351,12 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
                  "Perform an per byte root mean square error calculation of the output of the inference with"
                  " the tflite ref model.",
                  cxxopts::value<bool>(m_ExNetParams.m_CompareWithTflite)->default_value("false")
+                         ->implicit_value("true"))
+
+                ("serialize-to-armnn",
+                 "Serialize the loaded network to an .armnn file. This option will also serialize the optimized network"
+                 " in dot format. This option only works with the TfLite parser. An inference will NOT be executed.",
+                 cxxopts::value<bool>(m_ExNetParams.m_SerializeToArmNN)->default_value("false")
                          ->implicit_value("true"));
 
         m_CxxOptions.add_options("d) Optimization")