8 files changed, 188 insertions, 17 deletions
diff --git a/delegate/classic/src/BroadcastTo.hpp b/delegate/classic/src/BroadcastTo.hpp
index 92aed79982..2e2b3ab155 100644
--- a/delegate/classic/src/BroadcastTo.hpp
+++ b/delegate/classic/src/BroadcastTo.hpp
@@ -1,11 +1,12 @@
 //
-// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #pragma once
 
 #include <armnn/utility/IgnoreUnused.hpp>
+#include <DelegateUtils.hpp>
 
 #include <tensorflow/lite/builtin_ops.h>
 #include <tensorflow/lite/c/builtin_op_data.h>
@@ -83,6 +84,15 @@ namespace armnnDelegate
         const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
         const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
 
+        if (ZeroDimPresent({inputTensorInfo, outputTensorInfo}))
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: Zero dimension tensors are not supported in operator #%d node #%d: ",
+                broadcastToOperatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+
         auto* shapeData = tflite::GetTensorData<int32_t>(&tfLiteShapeTensor);
         auto shapeTensorNum = tfLiteShapeTensor.dims->data[0];
 
diff --git a/delegate/common/src/DelegateUtils.hpp b/delegate/common/src/DelegateUtils.hpp
index 96767ff78c..245fc9be90 100644
--- a/delegate/common/src/DelegateUtils.hpp
+++ b/delegate/common/src/DelegateUtils.hpp
@@ -300,4 +300,19 @@ armnn::TensorInfo OutputShapeOfSqueeze(std::vector<uint32_t> squeezeDims,
     return outTensorInfo;
 }
 
+bool ZeroDimPresent(std::initializer_list<armnn::TensorInfo> tensorInfoList)
+{
+    for (armnn::TensorInfo tensorInfo : tensorInfoList)
+    {
+        for (unsigned int i = 0; i < tensorInfo.GetNumDimensions(); ++i)
+        {
+            if (tensorInfo.GetShape()[i] == 0)
+            {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
 } // namespace anonymous
diff --git a/delegate/common/src/test/DelegateUtilsTest.cpp b/delegate/common/src/test/DelegateUtilsTest.cpp
new file mode 100644
index 0000000000..5ce470e289
--- /dev/null
+++ b/delegate/common/src/test/DelegateUtilsTest.cpp
@@ -0,0 +1,54 @@
+//
+// Copyright © 2024 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <armnn/Tensor.hpp>
+#include <doctest/doctest.h>
+#include <common/src/DelegateUtils.hpp>
+
+namespace armnn
+{
+
+TEST_SUITE("DelegateUtils_Tests")
+{
+    TEST_CASE("Zero_Dim_In_Input_Test_True")
+    {
+        unsigned int inputDimSizes[] = {0, 1, 2, 3};
+        TensorInfo inputTensor = armnn::TensorInfo(4, inputDimSizes, DataType::Float32);
+
+        CHECK(ZeroDimPresent({inputTensor}) == true);
+    }
+
+    TEST_CASE("Zero_Dim_In_Input_Test_False")
+    {
+        unsigned int inputDimSizes[] = {1, 2, 3, 4};
+        TensorInfo inputTensor = armnn::TensorInfo(4, inputDimSizes, DataType::Float32);
+
+        CHECK(ZeroDimPresent({inputTensor}) == false);
+    }
+
+    TEST_CASE("Zero_Dim_In_Output_Test_True")
+    {
+        unsigned int inputDimSizes[] = {1, 2, 3, 4};
+        TensorInfo inputTensor = armnn::TensorInfo(4, inputDimSizes, DataType::Float32);
+
+        unsigned int outputDimSizes[] = {0, 1, 2, 3};
+        TensorInfo outputTensor = armnn::TensorInfo(4, outputDimSizes, DataType::Float32);
+
+        CHECK(ZeroDimPresent({inputTensor, outputTensor}) == true);
+    }
+
+    TEST_CASE("Zero_Dim_In_Output_Test_False")
+    {
+        unsigned int inputDimSizes[] = {1, 2, 3, 4};
+        TensorInfo inputTensor = armnn::TensorInfo(4, inputDimSizes, DataType::Float32);
+
+        unsigned int outputDimSizes[] = {1, 2, 3, 4};
+        TensorInfo outputTensor = armnn::TensorInfo(4, outputDimSizes, DataType::Float32);
+
+        CHECK(ZeroDimPresent({inputTensor, outputTensor}) == false);
+    }
+}
+
+}    // namespace armnn
+\ No newline at end of file
diff --git a/delegate/opaque/src/BroadcastTo.hpp b/delegate/opaque/src/BroadcastTo.hpp
index 379587546f..8fcea9393c 100644
--- a/delegate/opaque/src/BroadcastTo.hpp
+++ b/delegate/opaque/src/BroadcastTo.hpp
@@ -1,11 +1,12 @@
 //
-// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #pragma once
 
 #include <OpaqueDelegateUtils.hpp>
+#include <DelegateUtils.hpp>
 
 namespace armnnOpaqueDelegate
 {
@@ -102,6 +103,15 @@ namespace armnnOpaqueDelegate
         const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteOpaqueTensor(tfLiteOutputTensor,
                                                                                        true);
 
+        if (ZeroDimPresent({inputTensorInfo, outputTensorInfo}))
+        {
+            TF_LITE_OPAQUE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnOpaqueDelegate: Zero dimension tensors are not supported in operator #%d node #%d: ",
+                broadcastToOperatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+
         auto* shapeData = static_cast<int32_t*>(TfLiteOpaqueTensorData(tfLiteShapeTensor));
         int32_t shapeTensorNum = TfLiteOpaqueTensorDim(tfLiteShapeTensor, 0);
 
diff --git a/src/backends/reference/workloads/Broadcast.cpp b/src/backends/reference/workloads/Broadcast.cpp
index 24af0fc4b1..f17ec6b311 100644
--- a/src/backends/reference/workloads/Broadcast.cpp
+++ b/src/backends/reference/workloads/Broadcast.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2019 Arm Ltd. All rights reserved.
+// Copyright © 2019,2024 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -38,13 +38,31 @@ BroadcastLoop::BroadcastLoop(const TensorShape& inShape, const TensorShape& outS
     unsigned int sIn = 1;
     unsigned int sOut = 1;
 
+    // Get the difference between the output dimension and input dimension
+    const unsigned int dimDifference = numDims - inShape.GetNumDimensions();
+
     for (unsigned int j = numDims - 1, k = 0; k < numDims ; k++, j--)
     {
+
         m_DimData[j].m_DimSize = outShape[j];
-        m_DimData[j].m_Stride1 = (inShape[j] > 1) ? sIn : 0;
+        // Pretend there are extra 1-dimensional tensors prepended
+        if (dimDifference > 0 && j < dimDifference)
+        {
+            m_DimData[j].m_Stride1 = 0;
+            sIn *= 1;
+        }
+        else if (dimDifference > 0)
+        {
+            m_DimData[j].m_Stride1 = (inShape[j - dimDifference] > 1) ? sIn : 0;
+            sIn *= inShape[j - dimDifference];
+        }
+        else
+        {
+            m_DimData[j].m_Stride1 = (inShape[j] > 1) ? sIn : 0;
+            sIn *= inShape[j];
+        }
         m_DimData[j].m_StrideOut = sOut;
 
-        sIn *= inShape[j];
         sOut *= outShape[j];
     }
 }
diff --git a/src/backends/tosaCommon/TosaMappings.cpp b/src/backends/tosaCommon/TosaMappings.cpp
index 8608776471..bc1376b9cc 100644
--- a/src/backends/tosaCommon/TosaMappings.cpp
+++ b/src/backends/tosaCommon/TosaMappings.cpp
@@ -30,7 +30,8 @@ TosaSerializationBasicBlock* GetTosaMapping(const Layer* layer,
             {
                 return ConvertLeakyReluToTosaOperator(layer, inputs, outputs, activationDesc);
             }
-            if (activationDesc->m_Function == ActivationFunction::ReLu)
+            if (activationDesc->m_Function == ActivationFunction::ReLu ||
+                activationDesc->m_Function == ActivationFunction::BoundedReLu)
             {
                 return ConvertReluToTosaOperator(layer, inputs, outputs, activationDesc);
             }
diff --git a/src/backends/tosaCommon/operatorMappings/ReluOperator.cpp b/src/backends/tosaCommon/operatorMappings/ReluOperator.cpp
index bd1a59670e..541b39cd8d 100644
--- a/src/backends/tosaCommon/operatorMappings/ReluOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/ReluOperator.cpp
@@ -17,7 +17,7 @@
 TosaSerializationBasicBlock* ConvertReluToTosaOperator(const Layer* layer,
                                                        const std::vector<const TensorInfo*>& inputs,
                                                        const std::vector<const TensorInfo*>& outputs,
-                                                       const ActivationDescriptor*)
+                                                       const ActivationDescriptor* desc)
 {
     if (inputs.size() != 1)
     {
@@ -31,7 +31,36 @@ TosaSerializationBasicBlock* ConvertReluToTosaOperator(const Layer* layer,
 
     std::string inputName  = std::string("input_");
     std::string outputName = std::string("output0_");
-    std::string blockName  = std::string("Op_RELU_block_") + GetUniqueTosaMappingID();
+    std::string blockName  = "";
+
+    int32_t clamp_min = 0;
+    int32_t clamp_max = 0;
+    float float_max = 0.0f;
+    switch (desc->m_Function)
+    {
+        case ActivationFunction::ReLu:
+        {
+            clamp_max = std::numeric_limits<int32_t>::max();
+            float_max = std::numeric_limits<float>::max();
+            blockName = std::string("Op_RELU_block_") + GetUniqueTosaMappingID();
+            break;
+        }
+        case ActivationFunction::BoundedReLu:
+        {
+            clamp_max = static_cast<int32_t>(desc->m_A);
+            float_max = desc->m_A;
+            blockName = std::string("Op_BOUNDED_RELU_block_") + GetUniqueTosaMappingID();
+            break;
+        }
+        case ActivationFunction::LeakyReLu:
+        {
+            throw Exception("LeakyRelu TOSA mappings are performed in ConvertLeakyReluToTosaOperator().");
+        }
+        default:
+        {
+            throw Exception("Activation function is not supported in ConvertReluToTosaOperator().");
+        }
+    }
 
     // If a layer is present then the block will be used for execution, so input and output names need to be determined
     // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
@@ -60,8 +89,6 @@ TosaSerializationBasicBlock* ConvertReluToTosaOperator(const Layer* layer,
     DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType());
     tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
 
-    int32_t clamp_min = 0;
-    int32_t clamp_max = std::numeric_limits<int32_t>::max();
     std::string clampInputNameStr = inputName;
     if (inputDType0 == tosa::DType::DType_INT8 || inputDType0 == tosa::DType::DType_INT16)
     {
@@ -72,18 +99,26 @@ TosaSerializationBasicBlock* ConvertReluToTosaOperator(const Layer* layer,
         int32_t input_zp = inputs[0]->GetQuantizationOffset();
         int32_t output_zp = outputs[0]->GetQuantizationOffset();
 
-        clamp_min = outputs[0]->GetQuantizationOffset();
+        clamp_min = output_zp;
+
+        if (desc->m_Function == ActivationFunction::BoundedReLu)
+        {
+            clamp_max = static_cast<int32_t>(std::round(desc->m_A / outputs[0]->GetQuantizationScale())) + output_zp;
+        }
+
         if (inputDType0 == tosa::DType::DType_INT8)
         {
             clamp_min =
                 clamp_min < std::numeric_limits<int8_t>::min() ? std::numeric_limits<int8_t>::min() : clamp_min;
-            clamp_max = std::numeric_limits<int8_t>::max();
+            clamp_max =
+                clamp_max > std::numeric_limits<int8_t>::max() ? std::numeric_limits<int8_t>::max() : clamp_max;
         }
         else
         {
             clamp_min =
                 clamp_min < std::numeric_limits<int16_t>::min() ? std::numeric_limits<int16_t>::min() : clamp_min;
-            clamp_max = std::numeric_limits<int16_t>::max();
+            clamp_max =
+                clamp_max > std::numeric_limits<int16_t>::max() ? std::numeric_limits<int16_t>::max() : clamp_max;
         }
 
         TosaSerializationOperator* rescaleOp = nullptr;
@@ -101,8 +136,8 @@ TosaSerializationBasicBlock* ConvertReluToTosaOperator(const Layer* layer,
                                                       inputDType0,
                                                       {}));
     }
-
-    TosaClampAttribute attribute(clamp_min, clamp_max, 0, std::numeric_limits<float>::max());
+    
+    TosaClampAttribute attribute(clamp_min, clamp_max, 0, float_max);
     auto* clamp_op = new TosaSerializationOperator(Op_CLAMP,
                                                    Attribute_ClampAttribute,
                                                    &attribute,
diff --git a/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp b/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp
index 09a3d44c02..22fd782a1a 100644
--- a/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp
+++ b/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp
@@ -30,25 +30,28 @@ TEST_SUITE("TosaRefEndToEnd")
 static std::vector<BackendId> tosaDefaultBackends = { "TosaRef" };
 
 // Activation
-//LeakyRelu
+// LeakyRelu
 TEST_CASE("TosaRefLeakyReluActivationFloat32")
 {
     ActivationEndToEndTest<DataType::Float32>(tosaDefaultBackends, ActivationFunction::LeakyReLu, 1.f, 0, 0.01f);
 }
+
 TEST_CASE("TosaRefLeakyReluActivationFloat16")
 {
     ActivationEndToEndTest<DataType::Float16>(tosaDefaultBackends, ActivationFunction::LeakyReLu, 0.3f, 5, 0.01f);
 }
+
 TEST_CASE("TosaRefLeakyReluActivationInt8")
 {
     ActivationEndToEndTest<DataType::QAsymmS8>(tosaDefaultBackends, ActivationFunction::LeakyReLu, 0.6f, 7, 0.01f);
 }
+
 TEST_CASE("TosaRefLeakyReluActivationInt16")
 {
     ActivationEndToEndTest<DataType::QSymmS16>(tosaDefaultBackends, ActivationFunction::LeakyReLu, 0.15f, 0, 0.01f);
 }
 
-//Relu
+// Relu
 TEST_CASE("TosaRefReLuEndToEndTestQAsymmS8")
 {
     ActivationEndToEndTest<armnn::DataType::QAsymmS8>(tosaDefaultBackends, ActivationFunction::ReLu);
@@ -69,6 +72,31 @@ TEST_CASE("TosaRefReLuEndToEndTestQSymmS16")
     ActivationEndToEndTest<armnn::DataType::QSymmS16>(tosaDefaultBackends, ActivationFunction::ReLu);
 }
 
+// BoundedRelu
+TEST_CASE("TosaRefBoundedReLuEndToEndTestFloat32")
+{
+    ActivationEndToEndTest<armnn::DataType::Float32>(
+        tosaDefaultBackends, ActivationFunction::BoundedReLu, 1.0f, 0, 6.0f, 0.0f);
+}
+
+TEST_CASE("TosaRefBoundedReLuEndToEndTestFloat16")
+{
+    ActivationEndToEndTest<armnn::DataType::Float16>(
+        tosaDefaultBackends, ActivationFunction::BoundedReLu, 1.0f, 0, 6.0f, 0.0f);
+}
+
+TEST_CASE("TosaRefBoundedReLuEndToEndTestQAsymmS8")
+{
+    ActivationEndToEndTest<armnn::DataType::QAsymmS8>(
+        tosaDefaultBackends, ActivationFunction::BoundedReLu, 1.0f, 0, 6.0f, 0.0f);
+}
+
+TEST_CASE("TosaRefBoundedReLuEndToEndTestQSymmS16")
+{
+    ActivationEndToEndTest<armnn::DataType::QSymmS16>(
+        tosaDefaultBackends, ActivationFunction::BoundedReLu, 1.0f, 0, 6.0f, 0.0f);
+}
+
 // Addition
 TEST_CASE("TosaRefAdditionEndtoEndTestFloat32")
 {