From ae1c9fee8bb6073abf2a419af3266f23dbab82a9 Mon Sep 17 00:00:00 2001
From: Suhail Munshi <MohammedSuhail.Munshi@arm.com>
Date: Wed, 14 Apr 2021 12:16:49 +0100
Subject: Added S32 Integer support to DIV operator in CLElementWiseOperations
 with Tests

Partially Resolves : COMPMID-3793

Signed-off-by: Suhail Munshi <MohammedSuhail.Munshi@arm.com>
Change-Id: I14d6884c34f33a6caee11fc1230f9d2d3ae6c4c1
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5425
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-by: Manuel Bottini <manuel.bottini@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 src/core/CL/cl_kernels/elementwise_operation.cl    |  9 ++++--
 src/core/gpu/cl/kernels/ClElementwiseKernel.cpp    | 35 ++++++++++++++++++++--
 tests/validation/CL/ArithmeticDivision.cpp         | 30 ++++++++++++++++---
 .../fixtures/ElementwiseOperationsFixture.h        | 13 ++++++++
 4 files changed, 79 insertions(+), 8 deletions(-)

diff --git a/src/core/CL/cl_kernels/elementwise_operation.cl b/src/core/CL/cl_kernels/elementwise_operation.cl
index 11e0612205..a5b486a882 100644
--- a/src/core/CL/cl_kernels/elementwise_operation.cl
+++ b/src/core/CL/cl_kernels/elementwise_operation.cl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -38,10 +38,15 @@
 #define MAX(x, y) max(x, y)
 #define MIN(x, y) min(x, y)
 #define SQUARED_DIFF(x, y) (x - y) * (x - y)
-#define DIV(x, y) (x / y)
 #define POWER(x, y) pow(x, y)
 #define PRELU(x, y) (select(y * x, x, CONVERT((x > (DATA_TYPE_OUT)0), SELECT_VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE_OUT))))
 
+#if defined(S32)
+#define DIV(x, y) CONVERT(floor(CONVERT(x , VEC_DATA_TYPE(float, VEC_SIZE_OUT)) / CONVERT(y , VEC_DATA_TYPE(float, VEC_SIZE_OUT))), VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE_OUT));
+#else /* S32 */
+#define DIV(x, y) (x / y)
+#endif /* S32 */
+
 #define AND(x, y) (CONVERT((x && y), VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE_OUT)) & ((VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE_OUT))1))
 #define OR(x, y) (CONVERT((x || y), VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE_OUT)) & ((VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE_OUT))1))
 
diff --git a/src/core/gpu/cl/kernels/ClElementwiseKernel.cpp b/src/core/gpu/cl/kernels/ClElementwiseKernel.cpp
index 8f12eb2215..335ee9c392 100644
--- a/src/core/gpu/cl/kernels/ClElementwiseKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClElementwiseKernel.cpp
@@ -98,6 +98,29 @@ Status validate_arguments_with_float_only_supported_rules(const ITensorInfo &src
     return Status{};
 }
 
+Status validate_arguments_divide_operation(const ITensorInfo* src1, const ITensorInfo* src2, const ITensorInfo* dst)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src1, src2, dst);
+    ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src1);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src1, 1, DataType::F16, DataType::F32, DataType::S32);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src1, src2);
+
+    const TensorShape out_shape = TensorShape::broadcast_shape(src1->tensor_shape(), src2->tensor_shape());
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible");
+
+    // Validate in case of configured dst
+    if(dst->total_size() > 0)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::F16, DataType::F32, DataType::S32);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src1, dst);
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, dst->tensor_shape(), 0),
+                                        "Wrong shape for dst");
+    }
+
+    return Status{};
+}
+
 Status validate_arguments_with_arithmetic_rules(const ITensorInfo &src1, const ITensorInfo &src2, const ITensorInfo &dst)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&src1);
@@ -180,6 +203,8 @@ CLBuildOptions generate_build_options_with_arithmetic_rules(const ITensorInfo &s
         build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2info.scale));
         build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oqinfo.scale));
     }
+    build_opts.add_option_if(src1.data_type() == DataType::S32, "-DS32");
+
     return build_opts;
 }
 
@@ -459,9 +484,15 @@ void ClArithmeticKernel::configure(const ClCompileContext &compile_context, Arit
 Status ClArithmeticKernel::validate(ArithmeticOperation op, const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src1, src2, dst);
-    if(op == ArithmeticOperation::DIV || op == ArithmeticOperation::POWER)
+    if(op == ArithmeticOperation::DIV)
     {
-        // Division and Power operators don't support integer arithmetic
+        // Partial integer support S32/F32/F16
+        ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_divide_operation(src1, src2, dst));
+        ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_for_division(*src1->clone(), *src2->clone(), *dst->clone()).first);
+    }
+    else if(op == ArithmeticOperation::POWER)
+    {
+        // Power operators doesn't support integer arithmetic
         ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_with_float_only_supported_rules(*src1, *src2, *dst));
         ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_for_division(*src1->clone(), *src2->clone(), *dst->clone()).first);
     }
diff --git a/tests/validation/CL/ArithmeticDivision.cpp b/tests/validation/CL/ArithmeticDivision.cpp
index 36567dc02a..9dcdfb83e1 100644
--- a/tests/validation/CL/ArithmeticDivision.cpp
+++ b/tests/validation/CL/ArithmeticDivision.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -51,9 +51,10 @@ const auto ArithmeticDivisionFP16Dataset = combine(combine(framework::dataset::m
                                                    framework::dataset::make("DataType", DataType::F16));
 const auto ArithmeticDivisionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
                                                    framework::dataset::make("DataType", DataType::F32));
-const auto EmptyActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
-{ ActivationLayerInfo() });
-const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+const auto ArithmeticDivisionS32Dataset = combine(combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::S32)),
+                                                  framework::dataset::make("DataType", DataType::S32));
+const auto EmptyActivationFunctionsDataset = framework::dataset::make("ActivationInfo", { ActivationLayerInfo() });
+const auto ActivationFunctionsDataset      = framework::dataset::make("ActivationInfo",
 {
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f),
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f)
@@ -89,6 +90,27 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
 // clang-format on
 // *INDENT-ON*
 
+using CLArithmeticDivisionIntegerFixture = ArithmeticDivisionValidationIntegerFixture<CLTensor, CLAccessor, CLArithmeticDivision, int>;
+
+TEST_SUITE(Integer)
+TEST_SUITE(S32)
+
+FIXTURE_DATA_TEST_CASE(RunSmallInteger, CLArithmeticDivisionIntegerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticDivisionS32Dataset),
+                                                                                                                       EmptyActivationFunctionsDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunIntegerWithActivation, CLArithmeticDivisionIntegerFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ArithmeticDivisionS32Dataset),
+                       ActivationFunctionsDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+
 template <typename T>
 using CLArithmeticDivisionFloatFixture = ArithmeticDivisionValidationFloatFixture<CLTensor, CLAccessor, CLArithmeticDivision, T>;
 
diff --git a/tests/validation/fixtures/ElementwiseOperationsFixture.h b/tests/validation/fixtures/ElementwiseOperationsFixture.h
index bf51c7e69b..11ca510842 100644
--- a/tests/validation/fixtures/ElementwiseOperationsFixture.h
+++ b/tests/validation/fixtures/ElementwiseOperationsFixture.h
@@ -296,6 +296,19 @@ public:
     }
 };
 
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class ArithmeticDivisionValidationIntegerFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+    {
+        ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape, shape,
+                                                                                                    data_type0, data_type1, output_data_type,
+                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+    }
+};
+
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
 class ArithmeticDivisionValidationQuantizedFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
 {
-- 
cgit v1.2.1