From b18252dce941001d8980721596709ea01d55747a Mon Sep 17 00:00:00 2001
From: Sheri Zhang <sheri.zhang@arm.com>
Date: Tue, 7 Apr 2020 11:04:57 +0100
Subject: COMPMID-3239: Implement QSYMM16 LayerNormalizationKernel for CL

Signed-off-by: Sheri Zhang <sheri.zhang@arm.com>
Change-Id: Ib1577c4a9aa29293a903731b2a4083b0d2243e1e
Signed-off-by: Sheri Zhang <sheri.zhang@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2994
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
---
 tests/validation/CL/QLSTMLayerNormalization.cpp    | 197 +++++++++++++++++++++
 tests/validation/NEON/QLSTMLayerNormalization.cpp  |   2 +-
 .../fixtures/QLSTMLayerNormalizationFixture.h      |  34 +++-
 3 files changed, 229 insertions(+), 4 deletions(-)
 create mode 100644 tests/validation/CL/QLSTMLayerNormalization.cpp

(limited to 'tests')

diff --git a/tests/validation/CL/QLSTMLayerNormalization.cpp b/tests/validation/CL/QLSTMLayerNormalization.cpp
new file mode 100644
index 0000000000..ea5eca6261
--- /dev/null
+++ b/tests/validation/CL/QLSTMLayerNormalization.cpp
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/QLSTMLayerNormalizationFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr AbsoluteTolerance<int16_t> tolerance_s16(0); /**< Tolerance value for comparing reference's output against implementation's output for QSYMM16 data types */
+constexpr uint32_t                   vector_size_byte = 16;
+
+using test::datasets::ShapeDataset;
+template <uint32_t num_elements_per_iter, uint32_t num_batches, uint32_t num_iteration>
+class QLSTMLayerNormShapeDataSet : public ShapeDataset
+{
+    static constexpr auto boundary_minus_one = num_elements_per_iter * num_iteration - 1;
+    static constexpr auto boundary           = num_elements_per_iter * num_iteration;
+    static constexpr auto boundary_plus_one  = num_elements_per_iter * num_iteration + 1;
+
+public:
+    QLSTMLayerNormShapeDataSet(std::string name)
+        : ShapeDataset(name,
+    {
+        TensorShape{ boundary_minus_one, num_batches },
+                     TensorShape{ boundary, num_batches },
+                     TensorShape{ boundary_plus_one, num_batches }
+    })
+    {
+    }
+};
+
+template <uint32_t num_elements_per_iter, uint32_t num_batches>
+class QLSTMLayerNormShapeDataSet<num_elements_per_iter, num_batches, 0> : public ShapeDataset
+{
+public:
+    QLSTMLayerNormShapeDataSet(std::string name)
+        : ShapeDataset(name,
+    {
+        TensorShape{ 1, num_batches },
+                     TensorShape{ 2, num_batches }
+    })
+    {
+    }
+};
+} // namespace
+TEST_SUITE(CL)
+TEST_SUITE(QLSTMLayerNormalization)
+
+static const TensorShape correct_input_shape{ TensorShape(15U, 2U) };
+static const TensorShape correct_weight_shape{ TensorShape(15U) };
+static const TensorShape correct_bias_shape{ TensorShape(15U) };
+static const DataType    correct_input_dt{ DataType::QSYMM16 };
+static const DataType    correct_weight_dt{ DataType::QSYMM16 };
+static const DataType    correct_bias_dt{ DataType::S32 };
+static const uint32_t    tensor_num_channel{ 1 };
+
+// *INDENT-OFF*
+// clang-format off
+
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL,
+    zip(zip(
+        framework::dataset::make("InputInfo", {
+            TensorInfo(correct_input_shape, tensor_num_channel, DataType::F16), // input supports only QSYMM16
+            TensorInfo(correct_input_shape, tensor_num_channel, correct_input_dt), // weight supports only QSYMM16
+            TensorInfo(correct_input_shape, tensor_num_channel, correct_input_dt), // bias supports only S32
+            TensorInfo(TensorShape(15U, 2U, 2U), tensor_num_channel, correct_input_dt), // input supports only up to 2D
+            TensorInfo(correct_input_shape, tensor_num_channel, correct_input_dt), // weight supports only up to 1D
+            TensorInfo(correct_input_shape, tensor_num_channel, correct_input_dt), // bias supports only up to 1D
+            TensorInfo(correct_input_shape, tensor_num_channel, correct_input_dt), // input_shape[0] != weight_shape[0] should fail
+            TensorInfo(correct_input_shape, tensor_num_channel, correct_input_dt), // weight_shape[0] != bias_shape[0] should fail
+        }),
+        framework::dataset::make("WeightInfo", {
+            TensorInfo(correct_weight_shape, tensor_num_channel, correct_weight_dt),
+            TensorInfo(correct_weight_shape, tensor_num_channel, DataType::F16),
+            TensorInfo(correct_weight_shape, tensor_num_channel, correct_weight_dt),
+            TensorInfo(correct_weight_shape, tensor_num_channel, correct_weight_dt),
+            TensorInfo(TensorShape(15U, 2U), tensor_num_channel, correct_weight_dt),
+            TensorInfo(correct_weight_shape, tensor_num_channel, correct_weight_dt),
+            TensorInfo(TensorShape(14U), tensor_num_channel, correct_weight_dt),
+            TensorInfo(correct_weight_shape, tensor_num_channel, correct_weight_dt),
+        })
+    ),
+        framework::dataset::make("BiasInfo", {
+            TensorInfo(correct_bias_shape, tensor_num_channel, correct_bias_dt),
+            TensorInfo(correct_bias_shape, tensor_num_channel, correct_bias_dt),
+            TensorInfo(correct_bias_shape, tensor_num_channel, DataType::QSYMM16),
+            TensorInfo(correct_bias_shape, tensor_num_channel, correct_bias_dt),
+            TensorInfo(correct_bias_shape, tensor_num_channel, correct_bias_dt),
+            TensorInfo(TensorShape(15U, 2U), tensor_num_channel, correct_bias_dt),
+            TensorInfo(correct_bias_shape, tensor_num_channel, correct_bias_dt),
+            TensorInfo(TensorShape(14U), tensor_num_channel, correct_bias_dt),
+        })
+    ), input_info, weight_info, bias_info)
+{
+    TensorInfo dummy_output{};
+    const Status s = CLQLSTMLayerNormalizationKernel::validate(&input_info, &dummy_output, &weight_info, &bias_info);
+    ARM_COMPUTE_EXPECT(!bool(s), framework::LogLevel::ERRORS);
+}
+
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using CLQLSTMLayerNormalizationFixture = CLQLSTMLayerNormalizationValidationFixture<CLTensor, CLAccessor, CLQLSTMLayerNormalizationKernel, T>;
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QSYMM16)
+
+/** Tests will be targetting
+ * - Comparison between OpenCL kernel and the exact same but scalar version of reference kernel
+ * - Input shapes of 1D and 2D with the first dimension covers boundary values of 128-bit vector size (0~3 iterations)
+ * - Weight and bias 1D shape that have same size as that of input shapes
+ * - Quantization scale is greater and smaller than one.
+ * - Input values will be noted in fixture.
+ *
+ * What we can't test
+ * - Since reference kernel uses the exact the same algorithm in the same quantized domain
+ *   it is hard to fully test whether the algorithm accomplishes what it is supposed to.
+ * - The algorithm has been sensitive to quantization scale but it is hard to fully test
+ *   the sensitivity due to aforementioned reason.
+ * - Again, it is hard to fully test corner values due to the exact same algorithm of the
+ *   reference kernel and the OpenCL kernel.
+ */
+
+constexpr uint32_t qsymm16_per_vector = vector_size_byte / sizeof(int16_t);
+
+#define QSYMM16_DATASET_ITER(num_input_batch, num_iter)                                                              \
+    combine(combine(zip(zip(QLSTMLayerNormShapeDataSet<qsymm16_per_vector, num_input_batch, num_iter>("InputShape"), \
+                            QLSTMLayerNormShapeDataSet<qsymm16_per_vector, 1, num_iter>("WeightShape")),             \
+                        QLSTMLayerNormShapeDataSet<qsymm16_per_vector, 1, num_iter>("BiasShape")),                   \
+                    framework::dataset::make("DataType", DataType::QSYMM16)),                                        \
+            framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1. / 8192), QuantizationInfo(8192) }))
+
+#define QSYMM16_DATASET_1D \
+    concat(concat(QSYMM16_DATASET_ITER(1, 0), QSYMM16_DATASET_ITER(1, 1)), QSYMM16_DATASET_ITER(1, 2))
+
+#define QSYMM16_DATASET_2D \
+    concat(concat(QSYMM16_DATASET_ITER(3, 0), QSYMM16_DATASET_ITER(3, 1)), QSYMM16_DATASET_ITER(3, 2))
+
+FIXTURE_DATA_TEST_CASE(RandomValue1D, CLQLSTMLayerNormalizationFixture<int16_t>, framework::DatasetMode::ALL, QSYMM16_DATASET_1D)
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_s16);
+}
+
+FIXTURE_DATA_TEST_CASE(RandomValue2D, CLQLSTMLayerNormalizationFixture<int16_t>, framework::DatasetMode::ALL, QSYMM16_DATASET_2D)
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_s16);
+}
+
+#undef QSYMM16_DATASET_ITER
+#undef QSYMM16_DATASET_2D
+#undef QSYMM16_DATASET_1D
+
+TEST_SUITE_END() // QSYMM16
+TEST_SUITE_END() // Quantized
+TEST_SUITE_END() // QLSTMLayerNormalization
+TEST_SUITE_END() // CL
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/QLSTMLayerNormalization.cpp b/tests/validation/NEON/QLSTMLayerNormalization.cpp
index 8508a6e483..248bf5cf78 100644
--- a/tests/validation/NEON/QLSTMLayerNormalization.cpp
+++ b/tests/validation/NEON/QLSTMLayerNormalization.cpp
@@ -158,7 +158,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL,
 // *INDENT-ON*
 
 template <typename T>
-using NEQLSTMLayerNormalizationFixture = QLSTMLayerNormalizationValidationFixture<Tensor, Accessor, NEQLSTMLayerNormalizationKernel, T>;
+using NEQLSTMLayerNormalizationFixture = NEQLSTMLayerNormalizationValidationFixture<Tensor, Accessor, NEQLSTMLayerNormalizationKernel, T>;
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QSYMM16)
diff --git a/tests/validation/fixtures/QLSTMLayerNormalizationFixture.h b/tests/validation/fixtures/QLSTMLayerNormalizationFixture.h
index 5d2cd2bd55..72af9d9241 100644
--- a/tests/validation/fixtures/QLSTMLayerNormalizationFixture.h
+++ b/tests/validation/fixtures/QLSTMLayerNormalizationFixture.h
@@ -26,6 +26,9 @@
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
+#ifdef ARM_COMPUTE_CL
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#endif /* ARM_COMPUTE_CL */
 #include "arm_compute/runtime/NEON/NEScheduler.h"
 #include "tests/AssetsLibrary.h"
 #include "tests/Globals.h"
@@ -98,6 +101,8 @@ protected:
         }
     }
 
+    virtual void run_target(FunctionType &fn) = 0;
+
     TensorType compute_target(const TensorShape &input_shape, const TensorShape &weight_shape, const TensorShape &bias_shape)
     {
         TensorType input  = create_tensor<TensorType>(input_shape, _data_type, 1);
@@ -110,9 +115,7 @@ protected:
         allocate_tensors({ &input, &weight, &bias, &output });
         fill(AccessorType(input), AccessorType(weight), AccessorType(bias));
 
-        ThreadInfo tinfo;
-        tinfo.cpu_info = &NEScheduler::get().cpu_info();
-        fn.run(fn.window(), tinfo);
+        run_target(fn);
 
         return output;
     }
@@ -136,6 +139,31 @@ protected:
     QuantizationInfo _qinfo{};
 };
 
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class NEQLSTMLayerNormalizationValidationFixture : public QLSTMLayerNormalizationValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+protected:
+    void run_target(FunctionType &fn) override
+    {
+        ThreadInfo tinfo;
+        tinfo.cpu_info = &NEScheduler::get().cpu_info();
+        fn.run(fn.window(), tinfo);
+    }
+};
+
+#ifdef ARM_COMPUTE_CL
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class CLQLSTMLayerNormalizationValidationFixture : public QLSTMLayerNormalizationValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+protected:
+    void run_target(FunctionType &fn) override
+    {
+        CLScheduler::get().default_init();
+        fn.run(fn.window(), CLScheduler::get().queue());
+    }
+};
+#endif /* ARM_COMPUTE_CL */
+
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-- 
cgit v1.2.1