From 1b14c75c0d591c4abe4d2d41b7e4e165fbf58382 Mon Sep 17 00:00:00 2001
From: Sheri Zhang <sheri.zhang@arm.com>
Date: Mon, 9 Mar 2020 14:29:52 +0000
Subject: COMPMID-2968: Add support for QASYMM8_SIGNED in
 CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel

Signed-off-by: Sheri Zhang <sheri.zhang@arm.com>
Change-Id: I37e6e76dbd5546c0eaedfacd01ea905c37148e8a
Signed-off-by: Sheri Zhang <sheri.zhang@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2861
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 tests/validation/CL/GEMMLowp.cpp            |  40 +++++++++++
 tests/validation/fixtures/GEMMLowpFixture.h | 103 ++++++++++++++++++++++++++++
 tests/validation/reference/GEMMLowp.cpp     |  63 +++++++++++++++++
 tests/validation/reference/GEMMLowp.h       |   8 +++
 4 files changed, 214 insertions(+)

(limited to 'tests')

diff --git a/tests/validation/CL/GEMMLowp.cpp b/tests/validation/CL/GEMMLowp.cpp
index 3d7c76aa2b..8aa81d0962 100644
--- a/tests/validation/CL/GEMMLowp.cpp
+++ b/tests/validation/CL/GEMMLowp.cpp
@@ -389,6 +389,46 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedP
 TEST_SUITE_END() // MultGreater1
 TEST_SUITE_END() // BoundedReLu
 TEST_SUITE_END() // QuantizeDownInt32ToInt16ScaleByFixedPoint
+
+TEST_SUITE(QuantizeDownInt32ScaleByFloat)
+
+TEST_SUITE(QASYMM8)
+using CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture =
+    GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage, uint8_t>;
+
+FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture, framework::DatasetMode::ALL,
+                       combine(combine(combine(combine(combine(combine(framework::dataset::make("DataType", DataType::QASYMM8),
+                                                                       datasets::TinyShapes()),
+                                                               framework::dataset::make("result_real_multiplier", 0.33f)),
+                                                       framework::dataset::make("result_offset", 2, 3)),
+                                               framework::dataset::make("min", 0)),
+                                       framework::dataset::make("max", 255)),
+                               framework::dataset::make("addBias", { false, true })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+using CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture_Signed =
+    GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage, int8_t>;
+FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture_Signed, framework::DatasetMode::ALL,
+                       combine(combine(combine(combine(combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED),
+                                                                       datasets::TinyShapes()),
+                                                               framework::dataset::make("result_real_multiplier", 0.33f)),
+                                                       framework::dataset::make("result_offset", 2, 3)),
+                                               framework::dataset::make("min", -128)),
+                                       framework::dataset::make("max", 127)),
+                               framework::dataset::make("addBias", { false, true })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+
+TEST_SUITE_END() // QuantizeDownInt32ScaleByFloat
+
 TEST_SUITE_END() // OutputStage
 TEST_SUITE_END() // GEMMLowp
 TEST_SUITE_END() // CL
diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h
index 0207f4c5ae..be9ce96dcb 100644
--- a/tests/validation/fixtures/GEMMLowpFixture.h
+++ b/tests/validation/fixtures/GEMMLowpFixture.h
@@ -556,6 +556,109 @@ protected:
     SimpleTensor<uint8_t> _reference{};
 };
 
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(DataType data_type, TensorShape shape, float result_real_multiplier, int32_t result_offset, int32_t min, int32_t max, bool add_bias)
+    {
+        _target    = compute_target(data_type, shape, result_real_multiplier, result_offset, min, max, add_bias);
+        _reference = compute_reference(shape, result_real_multiplier, result_offset, min, max, add_bias);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        // To avoid data all being clampped
+        std::uniform_int_distribution<> distribution(-500, 500);
+        library->fill(tensor, distribution, i);
+    }
+
+    TensorType compute_target(DataType data_type, const TensorShape &shape, float result_multiplier, int32_t result_offset, int32_t min, int32_t max, bool add_bias)
+    {
+        TensorShape shape_bias(shape[0]);
+
+        // Create tensors
+        TensorType a = create_tensor<TensorType>(shape, DataType::S32, 1);
+        TensorType b = create_tensor<TensorType>(shape_bias, DataType::S32, 1);
+        TensorType c = create_tensor<TensorType>(shape, data_type, 1);
+
+        // create output stage info
+        GEMMLowpOutputStageInfo info;
+        info.gemmlowp_max_bound       = max;
+        info.gemmlowp_min_bound       = min;
+        info.gemmlowp_real_multiplier = result_multiplier;
+        info.gemmlowp_offset          = result_offset;
+        info.type                     = GEMMLowpOutputStageType::QUANTIZE_DOWN_FLOAT;
+        info.output_data_type         = data_type;
+
+        // Create and configure function
+        FunctionType output_stage;
+        output_stage.configure(&a, add_bias ? &b : nullptr, &c, info);
+
+        ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        a.allocator()->allocate();
+        c.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!c.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensor
+        fill(AccessorType(a), 0);
+
+        if(add_bias)
+        {
+            ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+            // Allocate bias tensor
+            b.allocator()->allocate();
+
+            ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+            // Fill tensor
+            fill(AccessorType(b), 1);
+        }
+
+        // Compute GEMM function
+        output_stage.run();
+        return c;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape, float_t result_real_multiplier, int32_t result_offset, int32_t min, int32_t max, bool add_bias)
+    {
+        // Create reference
+        TensorShape shape_bias(shape[0]);
+
+        SimpleTensor<int32_t> a{ shape, DataType::S32, 1 };
+        SimpleTensor<int32_t> b{ shape_bias, DataType::S32, 1 };
+
+        // Fill reference
+        fill(a, 0);
+
+        const std::vector<float_t> result_float_multiplier_vec = { result_real_multiplier };
+
+        if(add_bias)
+        {
+            // Fill bias
+            fill(b, 1);
+
+            return reference::gemmlowp_quantize_down_scale_by_float<int32_t, T>(a, b, result_float_multiplier_vec, result_offset, min, max);
+        }
+        else
+        {
+            return reference::gemmlowp_quantize_down_scale_by_float<int32_t, T>(a, result_float_multiplier_vec, result_offset, min, max);
+        }
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+};
+
 template <typename TensorType, typename AccessorType, typename FunctionType>
 class GEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointValidationFixture : public framework::Fixture
 {
diff --git a/tests/validation/reference/GEMMLowp.cpp b/tests/validation/reference/GEMMLowp.cpp
index 99d08e34f1..61617c8aae 100644
--- a/tests/validation/reference/GEMMLowp.cpp
+++ b/tests/validation/reference/GEMMLowp.cpp
@@ -131,6 +131,39 @@ void quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> *in, const Simple
                                                     std::min<TIn>(std::numeric_limits<TOut>::max(), result)));
     }
 }
+
+template <typename TIn, typename TOut>
+void quantize_down_scale_by_float(const SimpleTensor<TIn> *in, const SimpleTensor<TIn> *bias, SimpleTensor<TOut> *dst, std::vector<float_t> result_real_multiplier,
+                                  int32_t result_offset, int32_t min, int32_t max)
+{
+    const int  cols_in        = in->shape().x();
+    const bool is_per_channel = result_real_multiplier.size() > 1;
+
+    for(int i = 0; i < in->num_elements(); ++i)
+    {
+        TIn result = (*in)[i];
+
+        if(bias != nullptr)
+        {
+            result += (*bias)[i % cols_in];
+        }
+
+        // Float multiplication
+        const float_t multiplier = (is_per_channel) ? result_real_multiplier[i % cols_in] : result_real_multiplier[0];
+
+        float_t result_f = static_cast<float_t>(result) * multiplier + static_cast<float_t>(result_offset);
+        result           = static_cast<TIn>(std::round(result_f));
+
+        // Bounded ReLu
+        if(min != max)
+        {
+            result = std::max(min, std::min(max, result));
+        }
+
+        (*dst)[i] = static_cast<TOut>(std::max<TIn>(std::numeric_limits<TOut>::lowest(),
+                                                    std::min<TIn>(std::numeric_limits<TOut>::max(), result)));
+    }
+}
 } // namespace
 
 template <typename T_out, typename T_in, typename T_in_1>
@@ -237,6 +270,36 @@ SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor
     return dst;
 }
 
+template <typename TIn, typename TOut>
+SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> &in, const SimpleTensor<TIn> &bias,
+                                                         std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max)
+{
+    SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
+
+    quantize_down_scale_by_float<TIn, TOut>(&in, &bias, &dst, result_real_multiplier, result_offset, min, max);
+
+    return dst;
+}
+
+template <typename TIn, typename TOut>
+SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> &in,
+                                                         std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max)
+{
+    SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
+
+    quantize_down_scale_by_float<TIn, TOut>(&in, nullptr, &dst, result_real_multiplier, result_offset, min, max);
+
+    return dst;
+}
+
+template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
+                                                                     std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);
+template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a,
+                                                                     std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);
+template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
+                                                                    std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);
+template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a,
+                                                                    std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);
 template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, std::vector<int32_t> result_fixedpoint_multiplier,
                                                                           std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
 template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
diff --git a/tests/validation/reference/GEMMLowp.h b/tests/validation/reference/GEMMLowp.h
index 7d711263e8..5de48dab52 100644
--- a/tests/validation/reference/GEMMLowp.h
+++ b/tests/validation/reference/GEMMLowp.h
@@ -59,6 +59,14 @@ SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor
 template <typename TIn, typename TOut>
 SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> &in, const SimpleTensor<TIn> &bias, std::vector<int32_t> result_fixedpoint_multiplier,
                                                               std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min = 0, int32_t max = 0);
+
+template <typename TIn, typename TOut>
+SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> &in, const SimpleTensor<TIn> &bias,
+                                                         std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min = 0, int32_t max = 0);
+
+template <typename TIn, typename TOut>
+SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> &in,
+                                                         std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min = 0, int32_t max = 0);
 } // namespace reference
 } // namespace validation
 } // namespace test
-- 
cgit v1.2.1