From ceb442825b8c19e2450fa7bd43341d571a9b2eeb Mon Sep 17 00:00:00 2001
From: John Mcloughlin <john.mcloughlin@arm.com>
Date: Tue, 23 Apr 2024 16:47:04 +0100
Subject: IVGCVSW-8294 Fix quantized Conv2d TOSA mapping

* TosaConv2d
* TosaQuantization
* TosaRescale

Signed-off-by: John Mcloughlin <john.mcloughlin@arm.com>
Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com>
Change-Id: I6c7ceca1f7df62896b41a84e6a6448afd8c32b74
---
 .../test/Convolution2dEndToEndTestImpl.hpp         |  81 ++++----
 .../test/QuantizationEndToEndTestImpl.hpp          |  22 ++-
 src/backends/reference/test/RefEndToEndTests.cpp   |  13 ++
 .../tosaCommon/operatorMappings/Conv2dOperator.cpp |  73 +++++--
 .../operatorMappings/QuantizeOperator.cpp          | 186 ++++++++++--------
 .../operatorMappings/TosaOperatorUtils.hpp         |  27 +++
 .../operatorMappings/TosaRescaleOperatorUtils.hpp  | 211 ++++++++++++++++-----
 .../tosaReference/test/TosaRefEndToEndTests.cpp    |  14 ++
 8 files changed, 449 insertions(+), 178 deletions(-)
diff --git a/src/backends/backendsCommon/test/Convolution2dEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/Convolution2dEndToEndTestImpl.hpp
index bc9a94289b..f53f97ae88 100644
--- a/src/backends/backendsCommon/test/Convolution2dEndToEndTestImpl.hpp
+++ b/src/backends/backendsCommon/test/Convolution2dEndToEndTestImpl.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022, 2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 #pragma once
@@ -49,46 +49,51 @@ armnn::INetworkPtr CreateConstConvolution2dNetwork(const armnn::Convolution2dDes
     return network;
 }
 
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+template<DataType ArmnnIType, DataType ArmnnWType = ArmnnIType, DataType ArmnnBType = ArmnnIType,
+        DataType ArmnnOType = ArmnnIType>
 void Convolution2dEndToEnd(const std::vector<armnn::BackendId>& backends,
                            armnn::DataLayout dataLayout,
                            bool biasEnabled = true)
 {
     using namespace armnn;
+    using IT = ResolveType<ArmnnIType>;
+    using WT = ResolveType<ArmnnWType>;
+    using BT = ResolveType<ArmnnBType>;
+    using OT = ResolveType<ArmnnOType>;
 
-    const float   qScale  = IsQuantizedType<T>() ? 0.25f : 1.0f;
-    const int32_t qOffset = IsQuantizedType<T>() ? 50    : 0;
+    const float   qScale  = 1.0f;
+    const int32_t qOffset = IsQuantizedType<IT>() ? 10 : 0; // offset must be zero for non-quantized types
 
-    TensorInfo inputInfo({ 1, 5, 5, 1 }, ArmnnType, qScale, qOffset, true);
-    TensorInfo outputInfo({ 1, 3, 3, 1 }, ArmnnType, qScale, qOffset);
-    TensorInfo weightsInfo({ 1, 3, 3, 1 }, ArmnnType, qScale, qOffset, true);
-    TensorInfo biasesInfo({ 1 }, ArmnnType, qScale * qScale, 0, true);
+    TensorInfo inputInfo(  { 1, 5, 5, 1 }, ArmnnIType, qScale,          qOffset, true);
+    TensorInfo weightsInfo({ 1, 3, 3, 1 }, ArmnnWType, qScale,          qOffset, true);
+    TensorInfo biasesInfo( { 1 },          ArmnnBType, qScale * qScale, 0,       true);
+    TensorInfo outputInfo( { 1, 3, 3, 1 }, ArmnnOType, qScale,          qOffset);
 
     std::vector<float> inputData =
-    {
-        1.0f, 5.0f, 2.0f, 3.0f, 5.0f,
-        8.0f, 7.0f, 3.0f, 6.0f, 3.0f,
-        3.0f, 3.0f, 9.0f, 1.0f, 9.0f,
-        4.0f, 1.0f, 8.0f, 1.0f, 3.0f,
-        6.0f, 8.0f, 1.0f, 9.0f, 2.0f
-    };
+            {
+                    1, 5, 2, 3, 5,
+                    8, 7, 3, 6, 3,
+                    3, 3, 9, 1, 9,
+                    4, 1, 8, 1, 3,
+                    6, 8, 1, 9, 2
+            };
 
     std::vector<float> weightsData =
-    {
-        4.0f, 5.0f, 6.0f,
-        0.0f, 0.0f, 0.0f,
-        3.0f, 2.0f, 1.0f
-    };
+            {
+                    4, 5, 6,
+                    0, 0, 0,
+                    3, 2, 1
+            };
 
-    std::vector<float> biasesData = { 1.0f };
+    std::vector<float> biasesData = { 1 };
+    float bias = biasEnabled ? biasesData[0] : 0;
 
-    float bias = biasEnabled ? biasesData[0] : 0.0f;
     std::vector<float> expectedOutputData =
-    {
-        65.0f + bias,  76.0f + bias,  91.0f + bias,
-        107.0f + bias, 99.0f + bias,  89.0f + bias,
-        116.0f + bias, 98.0f + bias,  118.0f + bias,
-    };
+            {
+                    65 + bias, 76 + bias,  91 + bias,
+                    107 + bias, 99 + bias,  89 + bias,
+                    116 + bias, 98 + bias, 118 + bias
+            };
 
     Convolution2dDescriptor descriptor;
     descriptor.m_PadLeft     = 0;
@@ -102,16 +107,16 @@ void Convolution2dEndToEnd(const std::vector<armnn::BackendId>& backends,
 
     if (dataLayout == DataLayout::NCHW)
     {
-        PermuteTensorNhwcToNchw(inputInfo, inputData);
+        PermuteTensorNhwcToNchw(inputInfo,   inputData);
         PermuteTensorNhwcToNchw(weightsInfo, weightsData);
-        PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
+        PermuteTensorNhwcToNchw(outputInfo,  expectedOutputData);
     }
 
-    // Quantize data
-    std::vector<T> qInputData          = armnnUtils::QuantizedVector<T>(inputData, qScale, qOffset);
-    std::vector<T> qWeightsData        = armnnUtils::QuantizedVector<T>(weightsData, qScale, qOffset);
-    std::vector<T> qExpectedOutputData = armnnUtils::QuantizedVector<T>(expectedOutputData, qScale, qOffset);
-    std::vector<T> qBiasesData         = armnnUtils::QuantizedVector<T>(biasesData, qScale * qScale, 0);
+    // Convert data
+    std::vector<IT> qInputData = armnnUtils::QuantizedVector<IT>(inputData, qScale, qOffset);
+    std::vector<WT> qWeightsData = armnnUtils::QuantizedVector<WT>(weightsData, qScale, qOffset);
+    std::vector<BT> qBiasesData = armnnUtils::QuantizedVector<BT>(biasesData, qScale * qScale, 0);
+    std::vector<OT> qExpectedOutputData = armnnUtils::QuantizedVector<OT>(expectedOutputData, qScale, qOffset);
 
     ConstTensor weights(weightsInfo, qWeightsData);
     ConstTensor biases(biasesInfo, qBiasesData);
@@ -125,10 +130,10 @@ void Convolution2dEndToEnd(const std::vector<armnn::BackendId>& backends,
                                                           biases,
                                                           biasEnabled);
 
-    EndToEndLayerTestImpl<ArmnnType, ArmnnType>(std::move(network),
-                                                {{ 0, qInputData }},
-                                                {{ 0, qExpectedOutputData }},
-                                                backends);
+    EndToEndLayerTestImpl<ArmnnIType, ArmnnOType>(std::move(network),
+                                                  {{ 0, qInputData }},
+                                                  {{ 0, qExpectedOutputData }},
+                                                  backends);
 }
 
 } // anonymous namespace
diff --git a/src/backends/backendsCommon/test/QuantizationEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/QuantizationEndToEndTestImpl.hpp
index f5c2eea601..3039b9b5a3 100644
--- a/src/backends/backendsCommon/test/QuantizationEndToEndTestImpl.hpp
+++ b/src/backends/backendsCommon/test/QuantizationEndToEndTestImpl.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -105,4 +105,24 @@ void QuantizationEndToEndFloat16(const std::vector<armnn::BackendId>& backends)
                                                                  qOffset);
 };
 
+inline void QuantizationEndToEndInt8(const std::vector<armnn::BackendId>& backends)
+{
+    using namespace armnn;
+
+    const TensorShape tensorShape({ 1, 1, 1, 5 });
+
+    std::vector<int8_t> inputData = { 113, 16, 13, 101, 13 };
+    std::vector<int8_t> expectedOutputData = { 127, 45, 41, 127, 41 };
+
+    float qScale = 0.75f;
+    int32_t qOffset = 24;
+
+    QuantizeEndToEndLayerTestImpl<DataType::QSymmS8, DataType::QSymmS8>(backends,
+                                                                        tensorShape,
+                                                                        inputData,
+                                                                        expectedOutputData,
+                                                                        qScale,
+                                                                        qOffset);
+};
+
 }
\ No newline at end of file
diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp
index 73786b5ccd..68b7fbff90 100644
--- a/src/backends/reference/test/RefEndToEndTests.cpp
+++ b/src/backends/reference/test/RefEndToEndTests.cpp
@@ -626,6 +626,13 @@ TEST_CASE("RefConvolution2dFloat16Test")
     Convolution2dEndToEnd<armnn::DataType::Float16>(defaultBackends, armnn::DataLayout::NHWC);
 }
 
+TEST_CASE("RefConvolution2dInt8Test")
+{
+    Convolution2dEndToEnd<armnn::DataType::QSymmS8,
+                          armnn::DataType::QSymmS8,
+                          armnn::DataType::Signed32>(defaultBackends, armnn::DataLayout::NHWC);
+}
+
 TEST_CASE("RefConvolution3dFloat32Test")
 {
     Convolution3dEndToEnd<armnn::DataType::Float32, armnn::DataType::Float32>(defaultBackends,
@@ -818,6 +825,12 @@ TEST_CASE("DepthToSpaceEndToEndNhwcInt16")
     DepthToSpaceEndToEnd<armnn::DataType::QSymmS16>(defaultBackends, armnn::DataLayout::NHWC);
 }
 
+// Quantization
+TEST_CASE("RefQuantizeInt8")
+{
+    QuantizationEndToEndInt8(defaultBackends);
+}
+
 // Dequantize
 TEST_CASE("DequantizeEndToEndSimpleTest")
 {
diff --git a/src/backends/tosaCommon/operatorMappings/Conv2dOperator.cpp b/src/backends/tosaCommon/operatorMappings/Conv2dOperator.cpp
index c65f1891da..1c8682b1ab 100644
--- a/src/backends/tosaCommon/operatorMappings/Conv2dOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/Conv2dOperator.cpp
@@ -4,6 +4,8 @@
 //
 
 #include "Conv2dOperator.hpp"
+#include "TosaRescaleOperatorUtils.hpp"
+#include <ResolveType.hpp>
 
 TosaSerializationBasicBlock* ConvertConv2dToTosaOperator(const Layer* layer,
                                                          const std::vector<const TensorInfo*>& inputs,
@@ -14,6 +16,9 @@ TosaSerializationBasicBlock* ConvertConv2dToTosaOperator(const Layer* layer,
     std::string outputName = std::string("output0_");
     std::string blockName  = std::string("Op_CONV2D_block_") + GetUniqueTosaMappingID();
 
+    DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType());
+    DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType());
+
     // Set input names for validation purposes only.
     if(layer == nullptr)
     {
@@ -52,7 +57,6 @@ TosaSerializationBasicBlock* ConvertConv2dToTosaOperator(const Layer* layer,
     if(inputNames[0].find("input0_") != std::string::npos)
     {
         std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputs[0]->GetShape());
-        DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType());
 
         tensors.push_back(new TosaSerializationTensor(inputNames[0], inputShape0, inputDType0, {}));
     }
@@ -87,23 +91,32 @@ TosaSerializationBasicBlock* ConvertConv2dToTosaOperator(const Layer* layer,
         // The size of the bias must match the channels dimension, so get the correct index.
         unsigned int index = (conv2dDescriptor->m_DataLayout == DataLayout::NHWC) ? 3 : 1;
 
-        std::vector<uint8_t> uint8Data;
-        std::vector<float> data(outputs[0]->GetShape()[index], 0.0f);
+        const DType dType = (inputDType0 == DType_INT8) ? DType_INT32 : outputDType0;
+        std::vector<float> data(outputs[0]->GetShape()[index], 0);
 
+        std::vector<uint8_t> uint8Data;
         TosaSerializationHandler::ConvertF32toU8(data, uint8Data);
 
         tensors.push_back(new TosaSerializationTensor(constantName,
                                                       {static_cast<int32_t>(outputs[0]->GetShape()[index])},
-                                                      DType_FP32,
+                                                      dType,
                                                       uint8Data));
         inputNames.emplace_back(constantName);
     }
 
     // Setup Output Tensor
-    std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputs[0]->GetShape());
-    DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType());
-
-    tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
+    std::vector<int32_t> outputShape0 = {GetTosaTensorShape(outputs[0]->GetShape())};
+    std::string outputConv2dName;
+    bool isInputInt8 = (inputDType0 == DType_INT8);
+    if (isInputInt8)
+    {
+        outputConv2dName = std::string("intermediate0_") + GetUniqueTosaMappingID();
+        tensors.push_back(new TosaSerializationTensor(outputConv2dName, outputShape0, DType_INT32, {}));
+    }
+    else
+    {
+        tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
+    }
 
     // Set up CONV2D operator
     std::vector<int> pad = {static_cast<int>(conv2dDescriptor->m_PadTop),
@@ -114,15 +127,45 @@ TosaSerializationBasicBlock* ConvertConv2dToTosaOperator(const Layer* layer,
                                static_cast<int>(conv2dDescriptor->m_StrideX)};
     std::vector<int> dilation = {static_cast<int>(conv2dDescriptor->m_DilationY),
                                  static_cast<int>(conv2dDescriptor->m_DilationX)};
-    TosaConvAttribute attribute(pad, stride, dilation, 0, 0, false); // input_zp, weight_zp, local_bound
+    TosaConvAttribute attribute(pad, stride, dilation,
+                                inputs[0]->GetQuantizationOffset(), // input_zp
+                                inputs[1]->GetQuantizationOffset(), // weight_zp
+                                false); // local_bound
 
-    auto* op = new TosaSerializationOperator(Op_CONV2D,
-                                             Attribute_ConvAttribute,
-                                             &attribute,
-                                             inputNames,
-                                             {outputName});
-    operators.push_back(op);
+    std::string& convOutStr = isInputInt8 ? outputConv2dName : outputName;
+    auto* conv2d_op = new TosaSerializationOperator(Op_CONV2D,
+                                                    Attribute_ConvAttribute,
+                                                    &attribute,
+                                                    inputNames,
+                                                    {convOutStr});
+    operators.push_back(conv2d_op);
 
+
+    if (isInputInt8)
+    {
+        int32_t output_zp = outputs[0]->GetQuantizationOffset();
+        double output_scale = outputs[0]->GetQuantizationScales()[0];
+        double input_scale = inputs[0]->GetQuantizationScales()[0];
+        const std::vector<float>& weight_scales = inputs[1]->GetQuantizationScales();
+
+        TosaSerializationOperator* rescaleOp = nullptr;
+        TosaSerializationTensor* rescaleTensor = nullptr;
+        CreateRescaleTosaOperatorPerChannel(outputConv2dName,
+                                            outputName,
+                                            DType_INT8,
+                                            outputShape0,
+                                            0,
+                                            output_zp,
+                                            true,
+                                            true,
+                                            input_scale,
+                                            output_scale,
+                                            weight_scales,
+                                            &rescaleOp,
+                                            &rescaleTensor);
+        operators.push_back(rescaleOp);
+        tensors.push_back(rescaleTensor);
+    }
     // operatorInputNames/operatorOutputNames ends up being the same as
     // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
     return new TosaSerializationBasicBlock(blockName,     // name
diff --git a/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp b/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp
index 1107add6e9..1a104d8423 100644
--- a/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 // Copyright © 2020 The TensorFlow Authors. All Rights Reserved.
@@ -8,6 +8,8 @@
 
 #include "QuantizeOperator.hpp"
 
+#include "TosaRescaleOperatorUtils.hpp"
+
 // This function is paraphrased from:
 // tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc from function convertQuantizeOp
 TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
@@ -20,10 +22,6 @@ TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
                                          "ConvertQuantizeToTosaOperator: Quantize must have only one output" );
 
     std::string inputName           = std::string("input0_");
-    std::string outputNameZeroPoint = std::string("intermediate0_") + GetUniqueTosaMappingID();
-    std::string outputNameScale     = std::string("intermediate1_") + GetUniqueTosaMappingID();
-    std::string outputNameMul       = std::string("intermediate2_") + GetUniqueTosaMappingID();
-    std::string outputNameAdd       = std::string("intermediate3_") + GetUniqueTosaMappingID();
     std::string outputName          = std::string("output0_");
     std::string blockName           = std::string("Op_QUANTIZE_block_") + GetUniqueTosaMappingID();
 
@@ -55,85 +53,121 @@ TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
 
     std::vector<TosaSerializationTensor*> tensors;
 
+    std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputInfo.GetShape());
+    DType inputDType0 = ArmNNToDType(inputInfo.GetDataType());
+    float isFloatInput = inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32;
+
     // Only add input tensors if connected layer is an input layer.
     // As intermediate or constant tensors will be created separately.
     // There also can't be duplicate tensor.
-    std::vector<int32_t> inputShape0;
-    DType inputDType0 =  DType::DType_UNKNOWN;
     if(inputName.find("input0_") != std::string::npos)
     {
-        inputShape0 = GetTosaTensorShape(inputInfo.GetShape());
-        inputDType0 = ArmNNToDType(inputInfo.GetDataType());
-        ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32,
-                                             "ConvertQuantizeToTosaOperator: Quantize input must be of type Float" );
         tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {}));
     }
 
     std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputInfo.GetShape());
     DType outputDType0 = ArmNNToDType(outputInfo.GetDataType());
 
-    // quantize:
-    // const_zeroPoint = constant(zeroPoint)
-    // const_scale = constant(scale)
-    // out_mul = mul(input, const_scale)
-    // out_add = add(out_mul, const_zeroPoint)
-    // output = cast<output_type>(out_add)
-
-    // const_zeroPoint
-    TosaSerializationOperator* zeroPointOp = nullptr;
-    TosaSerializationTensor* zeroPointTensor = nullptr;
-    CreateConstTosaOperator<float>(outputNameZeroPoint,
-                                   zeroPoint,
-                                   inputDType0,
-                                   inputShape0,
-                                   zeroPointOp,
-                                   zeroPointTensor);
-    tensors.push_back(zeroPointTensor);
-
-    // const_scale
-    TosaSerializationOperator *scaleOp = nullptr;
-    TosaSerializationTensor* scaleTensor = nullptr;
-    CreateConstTosaOperator<float>(outputNameScale,
-                                   scale,
-                                   inputDType0,
-                                   inputShape0,
-                                   scaleOp,
-                                   scaleTensor);
-    tensors.push_back(scaleTensor);
-
-    // mul
-    int32_t shift = 0;
-    TosaMulAttribute mulAttribute(shift);
-    TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,
-                                                                     Attribute_MulAttribute,
-                                                                     &mulAttribute,
-                                                                     {inputName, outputNameScale},
-                                                                     {outputNameMul});
-    tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));
-
-    // add
-    TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,
-                                                                     Attribute_NONE,
-                                                                     nullptr,
-                                                                     {outputNameMul, outputNameZeroPoint},
-                                                                     {outputNameAdd});
-    tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));
-
-    // cast
-    TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,
-                                                                      Attribute_NONE,
-                                                                      nullptr,
-                                                                      {outputNameAdd},
-                                                                      {outputName});
-
-    tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
-
-    // operatorInputNames/operatorOutputNames ends up being the same as
-    // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
-    return new TosaSerializationBasicBlock(blockName,                                       // name
-                                           mainName,                                        // region name
-                                           {zeroPointOp, scaleOp, mulOp, addOp, castOp},    // operators
-                                           tensors,                                         // tensors
-                                           {inputName},                                     // inputs
-                                           {outputName});                                   // outputs
-}
+    if (isFloatInput)
+    {
+        // quantize:
+        // const_zeroPoint = constant(zeroPoint)
+        // const_scale = constant(scale)
+        // out_mul = mul(input, const_scale)
+        // out_add = add(out_mul, const_zeroPoint)
+        // output = cast<output_type>(out_add)
+
+        std::string outputNameScale     = std::string("input1_") + GetUniqueTosaMappingID();
+        std::string outputNameZeroPoint = std::string("input2_") + GetUniqueTosaMappingID();
+        std::string outputNameMul       = std::string("intermediate0_") + GetUniqueTosaMappingID();
+        std::string outputNameAdd       = std::string("intermediate1_") + GetUniqueTosaMappingID();
+
+        // const_zeroPoint
+        TosaSerializationOperator* zeroPointOp = nullptr;
+        TosaSerializationTensor* zeroPointTensor = nullptr;
+        CreateConstTosaOperator<float>(outputNameZeroPoint,
+                                       zeroPoint,
+                                       inputDType0,
+                                       inputShape0,
+                                       zeroPointOp,
+                                       zeroPointTensor);
+        tensors.push_back(zeroPointTensor);
+
+        // const_scale
+        TosaSerializationOperator *scaleOp = nullptr;
+        TosaSerializationTensor* scaleTensor = nullptr;
+        CreateConstTosaOperator<float>(outputNameScale,
+                                       scale,
+                                       inputDType0,
+                                       inputShape0,
+                                       scaleOp,
+                                       scaleTensor);
+        tensors.push_back(scaleTensor);
+
+        // mul
+        int32_t shift = 0;
+        TosaMulAttribute mulAttribute(shift);
+        TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,
+                                                                         Attribute_MulAttribute,
+                                                                         &mulAttribute,
+                                                                         {inputName, outputNameScale},
+                                                                         {outputNameMul});
+        tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));
+
+        // add
+        TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,
+                                                                         Attribute_NONE,
+                                                                         nullptr,
+                                                                         {outputNameMul, outputNameZeroPoint},
+                                                                         {outputNameAdd});
+        tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));
+
+        // cast
+        TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,
+                                                                          Attribute_NONE,
+                                                                          nullptr,
+                                                                          {outputNameAdd},
+                                                                          {outputName});
+
+        tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
+
+        // operatorInputNames/operatorOutputNames ends up being the same as
+        // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
+        return new TosaSerializationBasicBlock(blockName,                                       // name
+                                               mainName,                                        // region name
+                                               {zeroPointOp, scaleOp, mulOp, addOp, castOp},    // operators
+                                               tensors,                                         // tensors
+                                               {inputName},                                     // inputs
+                                               {outputName});                                   // outputs
+    }
+    else
+    {
+        double scale_alpha      = inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale();
+        int32_t input_zp        = inputs[0]->GetQuantizationOffset();
+        int32_t output_zp       = outputs[0]->GetQuantizationOffset();
+
+        TosaSerializationOperator* rescaleOp = nullptr;
+        TosaSerializationTensor* rescaleTensor = nullptr;
+        CreateRescaleTosaOperator(inputName,
+                                  outputName,
+                                  outputDType0,
+                                  inputShape0,
+                                  scale_alpha,
+                                  input_zp,
+                                  output_zp,
+                                  true,
+                                  true,
+                                  &rescaleOp,
+                                  &rescaleTensor);
+        tensors.push_back(rescaleTensor);
+
+        // operatorInputNames/operatorOutputNames ends up being the same as
+        // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
+        return new TosaSerializationBasicBlock(blockName,      // name
+                                               mainName,       // region name
+                                               {rescaleOp},    // operators
+                                               tensors,        // tensors
+                                               {inputName},    // inputs
+                                               {outputName});  // outputs
+    }
+}
\ No newline at end of file
diff --git a/src/backends/tosaCommon/operatorMappings/TosaOperatorUtils.hpp b/src/backends/tosaCommon/operatorMappings/TosaOperatorUtils.hpp
index 047e0a1f42..b7f14bf5b7 100644
--- a/src/backends/tosaCommon/operatorMappings/TosaOperatorUtils.hpp
+++ b/src/backends/tosaCommon/operatorMappings/TosaOperatorUtils.hpp
@@ -48,6 +48,33 @@ inline DType ArmNNToDType(const DataType& type)
     }
 }
 
+// Function to return ArmNN datatype from input Tosa datatype.
+inline DataType DtypeToArmNN(const DType type)
+{
+    switch (type)
+    {
+        case DType_FP16:
+            return DataType::Float16;
+        case DType_BF16:
+            return DataType::BFloat16;
+        case DType_FP32:
+            return DataType::Float32;
+        case DType_UINT8:
+            return DataType::QAsymmU8;
+        case DType_INT8:
+            return DataType::QSymmS8;
+        case DType_INT16:
+            return DataType::QSymmS16;
+        case DType_INT32:
+            return DataType::Signed32;
+        case DType_BOOL:
+            return DataType::Boolean;
+        default:
+            throw armnn::Exception("DtypeToArmNN: Unsupported tosa::DType in ArmNN.");
+            return DataType::Boolean;
+    }
+}
+
 // Function to return Tosa tensor shape from input ArmNN tensor shape.
 inline std::vector<int32_t> GetTosaTensorShape(const TensorShape& shape)
 {
diff --git a/src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp b/src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp
index 1a4dd7aac3..56337cfdf4 100644
--- a/src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp
+++ b/src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp
@@ -11,12 +11,13 @@ inline void CreateRescaleTosaOperator(const std::string& inputName,
                                       const std::string& outputName,
                                       DType output_type,
                                       const std::vector<int32_t>& shape,
-                                      int32_t scale_multiplier,
-                                      int32_t scale_shift,
+                                      const std::vector<int32_t>& multipliers,
+                                      const std::vector<int32_t>& shifts,
                                       int32_t input_zp,
                                       int32_t output_zp,
                                       bool double_round,
                                       bool scale32,
+                                      bool per_channel,
                                       TosaSerializationOperator** op,
                                       TosaSerializationTensor** tensor)
 {
@@ -25,15 +26,13 @@ inline void CreateRescaleTosaOperator(const std::string& inputName,
         throw armnn::Exception("CreateRescaleTosaOperator: nullptr op");
     }
 
-    std::vector<int32_t> multipliers{scale_multiplier};
-    std::vector<int32_t> shifts{scale_shift};
     TosaRescaleAttribute attribute(input_zp,
                                    output_zp,
                                    multipliers,
                                    shifts,
                                    scale32,
                                    double_round,
-                                   false,  // per_channel
+                                   per_channel,
                                    false,  // input_unsigned
                                    false); // output_unsigned
 
@@ -58,75 +57,191 @@ inline void CreateRescaleTosaOperator(const std::string& inputName,
                                       const std::string& outputName,
                                       DType output_type,
                                       const std::vector<int32_t>& shape,
-                                      double scale,
+                                      int32_t scale_multiplier,
+                                      int32_t scale_shift,
                                       int32_t input_zp,
                                       int32_t output_zp,
                                       bool double_round,
                                       bool scale32,
+                                      bool per_channel,
                                       TosaSerializationOperator** op,
                                       TosaSerializationTensor** tensor)
 {
-    //  The code that follows is based on the behaviour specified in
-    //  https://www.mlplatform.org/tosa/tosa_spec.html#_precision_scaling
+    const std::vector<int32_t> multipliers{scale_multiplier};
+    const std::vector<int32_t> shifts{scale_shift};
+    CreateRescaleTosaOperator(inputName, outputName, output_type, shape, multipliers, shifts,
+                              input_zp, output_zp, double_round, scale32, per_channel, op, tensor);
+}
+
+/// The following is taken from mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp in the LLVM project
+/// From a scale value, generates multiplier and shift values where
+/// mantissa is in [-1.0,-0.5] or [0.5, 1.0] such that
+/// multiplier = mantissa*2^shift for 32-bit scaling.
+static void ComputeMultiplierAndShiftTosaScale32(double scale,
+                                                 int32_t &multiplier,
+                                                 int32_t &shift)
+{
+    const double mantissa = std::frexp(scale, &shift);
+    auto shiftedM = std::round(mantissa * (int64_t(1) << 31));
+
+    // Can't be greater than 1.0.
+    if (!(shiftedM <= (int64_t(1) << 31)))
+    {
+        throw armnn::Exception("Shifted mantissa exceeds 32 signed bits");
+    }
 
-    auto GetScaleParams = [](double scale, double& m, int32_t& n)
+    if (shiftedM == (int64_t(1) << 31))
     {
-        m = 0;
-        n = 0;
+        shiftedM /= 2;
+        shift++;
+    }
 
-        double lastErr = 1e06;
+    // TOSA expects right shift to be positive, and embed (1 << 31) into right
+    // shift bits.
+    shift = (-shift) + 31;
 
-        const int32_t numExponents = 62;
-        const double start = 1.0;
-        const double end = 2.0;
+    if (!(shiftedM <= std::numeric_limits<int32_t>::max()))
+    {
+        throw armnn::Exception("Shifted mantissa exceeds 32-bit signed output type");
+    }
 
-        // Slow iterative approach but running in Reference only
-        for (int32_t i = 0; i < numExponents; ++i)
-        {
-            double exp = 1.0 / (1 << i);
-            double currentM = scale / exp;    // Find current m given value = currentM  * exp
-            if ((currentM >= start) && (currentM < end))
-            {
-                double value = currentM * exp;
-                double err = std::abs(scale - value);
-                if (err < lastErr)
-                {
-                    // Take the m, n that minimize the error
-                    n = i;
-                    m = currentM;
-                    lastErr = err;
-                }
-            }
-        }
-    };
+    multiplier = static_cast<int32_t>(shiftedM);
 
-    auto GetMultiplierShiftByScale = [GetScaleParams](bool scale32, double scale, int32_t& multiplier, int32_t& shift)
+    // Shifting tops out at 62 bits. Right shift to make 62 bits the max.
+    // The limit of 62 on shift allows the shift to be decomposed as
+    // two right shifts of 31.
+    if (shift > 62)
     {
-        double m = 0;
-        int32_t n = 0;
+        // Shifting the multiplier by more than 32-bits is unnecessary.
+        multiplier = multiplier >> std::min<int32_t>(31, shift - 62);
+        shift = 62;
+    }
+}
 
-        GetScaleParams(scale, m, n);
+/// The following is taken from mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp in the LLVM project
+/// From a scale value, generates multiplier and shift values where
+/// mantissa is in [-1.0,-0.5] or [0.5, 1.0] such that
+/// multiplier = mantissa*2^shift for 16-bit scaling.
+static void ComputeMultiplierAndShiftTosaScale16(double scale,
+                                                 int32_t &multiplier,
+                                                 int32_t &shift)
+{
+    const double mantissa = std::frexp(scale, &shift);
+    auto shiftedM = std::round(mantissa * (int64_t(1) << 15));
 
-        multiplier  = (scale32) ? (1 << 30) * static_cast<int32_t>(m) : (1 << 14) * static_cast<int32_t>(m);
-        shift       = (scale32) ? (30 + n) : (14 + n);
-    };
+    // Can't be greater than 1.0.
+    if (!(shiftedM <= (int64_t(1) << 15)))
+    {
+        throw armnn::Exception("Shifted mantissa exceeds 16 signed bits");
+    }
 
+    if (shiftedM == (int64_t(1) << 15))
+    {
+        shiftedM /= 2;
+        shift++;
+    }
+
+    // TOSA expects right shift to be positive and embed (1 << 15) into right
+    // shift bits.
+    shift = (-shift) + 15;
+
+    if (!(shiftedM <= std::numeric_limits<int32_t>::max()))
+    {
+        throw armnn::Exception("Shifted mantissa exceeds 32-bit signed output type");
+    }
+
+    multiplier = static_cast<int32_t>(shiftedM);
+
+    // Shifting tops out at 62 bits. Right shift to make 62 bits the max.
+    // The limit of 62 on shift allows the shift to be decomposed as
+    // two right shifts of 31.
+    if (shift > 62)
+    {
+        // Shifting the multiplier by more than 31-bits is unnecessary.
+        multiplier = multiplier >> std::min<int32_t>(31, shift - 62);
+        shift = 62;
+    }
+}
+
+inline void CreateRescaleTosaOperator(const std::string& inputName,
+                                      const std::string& outputName,
+                                      DType output_type,
+                                      const std::vector<int32_t>& shape,
+                                      double scale,
+                                      int32_t input_zp,
+                                      int32_t output_zp,
+                                      bool double_round,
+                                      bool scale32,
+                                      TosaSerializationOperator** op,
+                                      TosaSerializationTensor** tensor)
+{
     int32_t multiplier;
     int32_t shift;
-    GetMultiplierShiftByScale(scale32, scale, multiplier, shift);
+
+    if (scale32)
+    {
+        ComputeMultiplierAndShiftTosaScale32(scale, multiplier, shift);
+    }
+    else
+    {
+        ComputeMultiplierAndShiftTosaScale16(scale, multiplier, shift);
+    }
+
     CreateRescaleTosaOperator(inputName, outputName, output_type, shape, multiplier, shift,
-                              input_zp, output_zp, double_round, scale32, op, tensor);
+                              input_zp, output_zp, double_round, scale32, false, op, tensor);
 }
 
-inline void CreateFromInt32RescaleTosaOperator(const std::string& inputName,
-                                               const std::string& outputName,
+inline void CreateRescaleTosaOperatorPerChannel(const std::string& inputName,
+                                                const std::string& outputName,
                                                 DType output_type,
                                                 const std::vector<int32_t>& shape,
-                                                double output_scale,
+                                                int32_t input_zp,
                                                 int32_t output_zp,
+                                                bool double_round,
+                                                bool scale32,
+                                                double input_scale,
+                                                double output_scale,
+                                                const std::vector<float>& weight_scales,
                                                 TosaSerializationOperator** op,
                                                 TosaSerializationTensor** tensor)
 {
-    CreateRescaleTosaOperator(inputName, outputName, output_type, shape,
-                              output_scale, 0, output_zp, true, true, op, tensor);
+    std::vector<int32_t> op_tensor_multipliers;
+    std::vector<int32_t> op_tensor_shifts;
+    op_tensor_multipliers.reserve(weight_scales.size());
+    op_tensor_shifts.reserve(weight_scales.size());
+
+    for (const float& weight_scale : weight_scales)
+    {
+        double op_tensor_scale = (input_scale * weight_scale) / output_scale;
+        int32_t multiplier;
+        int32_t shift;
+
+        if (scale32)
+        {
+            ComputeMultiplierAndShiftTosaScale32(op_tensor_scale, multiplier, shift);
+        }
+        else
+        {
+            ComputeMultiplierAndShiftTosaScale16(op_tensor_scale, multiplier, shift);
+        }
+
+        op_tensor_multipliers.push_back(multiplier);
+        op_tensor_shifts.push_back(shift);
+    }
+
+    CreateRescaleTosaOperator(inputName, outputName, output_type, shape, op_tensor_multipliers, op_tensor_shifts,
+                              input_zp, output_zp, double_round, scale32, true, op, tensor);
+}
+
+inline void CreateFromInt32RescaleTosaOperator(const std::string& inputName,
+                                               const std::string& outputName,
+                                               DType output_type,
+                                               const std::vector<int32_t>& shape,
+                                               double output_scale,
+                                               int32_t output_zp,
+                                               TosaSerializationOperator** op,
+                                               TosaSerializationTensor** tensor)
+{
+    CreateRescaleTosaOperator(inputName, outputName, output_type, shape, output_scale,
+                              0, output_zp, true, true, op, tensor);
 }
diff --git a/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp b/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp
index 68531f89ff..f5edfb0e0b 100644
--- a/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp
+++ b/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp
@@ -95,6 +95,20 @@ TEST_CASE("TosaRefConv2dWithoutBiasEndtoEndTestFloat32")
     Convolution2dEndToEnd<armnn::DataType::Float32>(tosaDefaultBackends, armnn::DataLayout::NHWC, false);
 }
 
+TEST_CASE("TosaRefConv2dEndtoEndTestInt8")
+{
+    Convolution2dEndToEnd<armnn::DataType::QSymmS8,
+                          armnn::DataType::QSymmS8,
+                          armnn::DataType::Signed32>(tosaDefaultBackends, armnn::DataLayout::NHWC);
+}
+
+TEST_CASE("TosaRefConv2dWithoutBiasEndtoEndTestInt8")
+{
+    Convolution2dEndToEnd<armnn::DataType::QSymmS8,
+                          armnn::DataType::QSymmS8,
+                          armnn::DataType::Signed32>(tosaDefaultBackends, armnn::DataLayout::NHWC, false);
+}
+
 // Maximum
 TEST_CASE("TosaRefMaximumEndtoEndTestInt8")
 {
-- 
cgit v1.2.1