COMPMID-425 Port CLBatchnormalization to support QS8/QS16

Change-Id: I46c93305f377666ea0915ff789b7dfdfff596087 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78862 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
author: Michalis Spyrou <michalis.spyrou@arm.com> 2017-06-26 14:18:47 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-09-17 14:16:42 +0100
commit: 172e57028ef14f2f8d6c56edc53c5c85f97e07cd (patch)
tree: b3fe8c05902f07fb2381cf6dfd893654c8ccb63f
parent: 579c0498e161215be1a36080b0b454e5198a992a (diff)
download: ComputeLibrary-172e57028ef14f2f8d6c56edc53c5c85f97e07cd.tar.gz
10 files changed, 333 insertions, 45 deletions
diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
index 088853841b..6df7ae4fc7 100644
--- a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
@@ -50,7 +50,7 @@ public:
 
     /** Set the input and output tensors.
      *
-     * @param[in]  input   Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. Data types supported: F32.
+     * @param[in]  input   Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. Data types supported: QS8/QS16/F32.
      * @param[out] output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
      *                     The rest are optional and used for representing batches.
      * @param[in]  mean    Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
index d766d1c69c..882786f1d6 100644
--- a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
@@ -47,7 +47,7 @@ public:
     /** Set the input and output tensors.
      *
      * @param[in]  input   Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM].
-     *                     The rest are optional and used for representing batches. Data types supported: F32.
+     *                     The rest are optional and used for representing batches. Data types supported: QS8/QS16/F32.
      * @param[in]  mean    Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]  var     Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]  gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
diff --git a/src/core/CL/cl_kernels/batchnormalization_layer.cl b/src/core/CL/cl_kernels/batchnormalization_layer.cl
index 13e6702334..cb4d0c8947 100644
--- a/src/core/CL/cl_kernels/batchnormalization_layer.cl
+++ b/src/core/CL/cl_kernels/batchnormalization_layer.cl
@@ -21,11 +21,31 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+
 #include "helpers.h"
 
+#if defined(FIXED_POINT_POSITION)
+#include "fixed_point.h"
+
+#define ADD_OP(a, b) ADD_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE)
+#define SUB_OP(a, b) SUB_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE)
+#define MUL_OP(a, b) MUL_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
+#define INVSQRT_OP(a) INVSQRT_OP_EXPAND((a), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
+#define SQCVT_SAT(a) SQCVT_SAT_OP_EXPAND((a), DATA_TYPE, FIXED_POINT_POSITION)
+
+#else /* FIXED_POINT_POSITION */
+
+#define ADD_OP(a, b) ((a) + (b))
+#define SUB_OP(a, b) ((a) - (b))
+#define MUL_OP(a, b) ((a) * (b))
+#define INVSQRT_OP(a) rsqrt((a))
+#define SQCVT_SAT(a) (a)
+
+#endif /* FIXED_POINT_POSITION */
+
 /** Apply batch normalization.
  *
- * @param[in]  input_ptr                            Pointer to the first source tensor. Supported data types: F32
+ * @param[in]  input_ptr                            Pointer to the first source tensor. Supported data types: QS8/QS16/F32
  * @param[in]  input_stride_x                       Stride of the first source tensor in X dimension (in bytes)
  * @param[in]  input_step_x                         input_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  input_stride_y                       Stride of the first source tensor in Y dimension (in bytes)
@@ -33,7 +53,7 @@
  * @param[in]  input_stride_z                       Stride of the first source tensor in Z dimension (in bytes)
  * @param[in]  input_step_z                         input_stride_z * number of elements along Z processed per workitem(in bytes)
  * @param[in]  input_offset_first_element_in_bytes  The offset of the first element in the first source tensor
- * @param[out] output_ptr                           Pointer to the destination tensor. Supported data types: F32
+ * @param[out] output_ptr                           Pointer to the destination tensor. Supported data types: same as @p input_ptr
  * @param[in]  output_stride_x                      Stride of the destination tensor in X dimension (in bytes)
  * @param[in]  output_step_x                        output_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  output_stride_y                      Stride of the destination tensor in Y dimension (in bytes)
@@ -41,19 +61,19 @@
  * @param[in]  output_stride_z                      Stride of the destination tensor in Z dimension (in bytes)
  * @param[in]  output_step_z                        output_stride_z * number of elements along Z processed per workitem(in bytes)
  * @param[in]  output_offset_first_element_in_bytes The offset of the first element in the destination tensor
- * @param[in]  mean_ptr                             Pointer to the mean source tensor. Supported data types: F32
+ * @param[in]  mean_ptr                             Pointer to the mean source tensor. Supported data types: same as @p input_ptr
  * @param[in]  mean_stride_x                        Stride of the mean source tensor in X dimension (in bytes)
  * @param[in]  mean_step_x                          mean_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  mean_offset_first_element_in_bytes   The offset of the first element in the mean source tensor
- * @param[in]  var_ptr                              Pointer to the var tensor. Supported data types: F32
+ * @param[in]  var_ptr                              Pointer to the var tensor. Supported data types: same as @p input_ptr
  * @param[in]  var_stride_x                         Stride of the var tensor in X dimension (in bytes)
  * @param[in]  var_step_x                           var_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  var_offset_first_element_in_bytes    The offset of the first element in the var source tensor
- * @param[in]  beta_ptr                             Pointer to the beta source tensor. Supported data types: F32
+ * @param[in]  beta_ptr                             Pointer to the beta source tensor. Supported data types: same as @p input_ptr
  * @param[in]  beta_stride_x                        Stride of the beta source tensor in X dimension (in bytes)
  * @param[in]  beta_step_x                          beta_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  beta_offset_first_element_in_bytes   The offset of the first element in the beta source tensor
- * @param[in]  gamma_ptr                            Pointer to the gamma source tensor. Supported data types: F32
+ * @param[in]  gamma_ptr                            Pointer to the gamma source tensor. Supported data types: same as @p input_ptr
  * @param[in]  gamma_stride_x                       Stride of the gamma source tensor in X dimension (in bytes)
  * @param[in]  gamma_step_x                         gamma_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  gamma_offset_first_element_in_bytes  The offset of the first element in the gamma source tensor
@@ -74,26 +94,33 @@ __kernel void batchnormalization_layer(TENSOR3D_DECLARATION(input),
     Vector   beta  = CONVERT_TO_VECTOR_STRUCT(beta);
     Vector   gamma = CONVERT_TO_VECTOR_STRUCT(gamma);
 
-    float4 _in         = 0;
-    float4 denominator = 0;
-    float4 numerator   = 0;
-    float4 x_bar       = 0;
-    float4 gamma_vec   = 0;
-    float4 beta_vec    = 0;
+    VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+    _in = 0;
+    VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+    denominator = 0;
+    VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+    numerator = 0;
+    VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+    x_bar = 0;
+    VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+    gamma_vec = 0;
+    VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+    beta_vec = 0;
 
     const int current_slice = get_global_id(2);
 
-    _in         = vload4(0, (__global float *)in.ptr);
-    denominator = *((__global float *)(var.ptr + current_slice * var.stride_x));
-    denominator = rsqrt(denominator + epsilon);
+    _in         = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in.ptr);
+    denominator = *((__global DATA_TYPE *)(var.ptr + current_slice * var.stride_x));
+    denominator = INVSQRT_OP(ADD_OP(denominator, SQCVT_SAT(epsilon)));
 
     // Calculate x bar and store results
-    numerator = *((__global float *)(mean.ptr + current_slice * mean.stride_x));
-    numerator = _in - numerator;
-    x_bar     = numerator * denominator;
+    numerator = *((__global DATA_TYPE *)(mean.ptr + current_slice * mean.stride_x));
+    numerator = SUB_OP(_in, numerator);
+    x_bar     = MUL_OP(numerator, denominator);
 
-    gamma_vec = *((__global float *)(gamma.ptr + current_slice * beta.stride_x));
-    beta_vec  = *((__global float *)(beta.ptr + current_slice * beta.stride_x));
+    gamma_vec = *((__global DATA_TYPE *)(gamma.ptr + current_slice * beta.stride_x));
+    beta_vec  = *((__global DATA_TYPE *)(beta.ptr + current_slice * beta.stride_x));
 
-    vstore4(gamma_vec * x_bar + beta_vec, 0, (__global float *)out.ptr);
+    VSTORE(VEC_SIZE)
+    (ADD_OP(MUL_OP(gamma_vec, x_bar), beta_vec), 0, (__global DATA_TYPE *)out.ptr);
 }
diff --git a/src/core/CL/cl_kernels/fixed_point.h b/src/core/CL/cl_kernels/fixed_point.h
index bb534f5a51..4de7fc576b 100644
--- a/src/core/CL/cl_kernels/fixed_point.h
+++ b/src/core/CL/cl_kernels/fixed_point.h
@@ -471,4 +471,16 @@ CONVERTQ_DOWN_SAT_IMPL(float16, qs16x16)
 CONVERTQ_UP_IMPL(qs8x16, float16)
 CONVERTQ_UP_IMPL(qs16x16, float16)
 
+#define SQCVT_SAT_IMPL(type)                                                                    \
+    inline type sqcvt_##type##_sat(float a, int fixed_point_position)                           \
+    {                                                                                           \
+        return CONVERT_SAT((a * (1 << fixed_point_position) + ((a < 0) ? -0.5f : 0.5f)), type); \
+    }
+
+SQCVT_SAT_IMPL(qs8)
+SQCVT_SAT_IMPL(qs16)
+
+#define SQCVT_SAT_OP_EXPAND_STR(a, type, position) sqcvt_##type##_sat((a), (position))
+#define SQCVT_SAT_OP_EXPAND(a, type, position) SQCVT_SAT_OP_EXPAND_STR((a), type, position)
+
 #endif // ARM_COMPUTE_FIXED_POINT_H
diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
index 85d8ab7cb4..02bf35a860 100644
--- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
@@ -26,12 +26,15 @@
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibrary.h"
 #include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/FixedPoint.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/Window.h"
 
+#include "support/ToolchainSupport.h"
+
 using namespace arm_compute;
 
 CLBatchNormalizationLayerKernel::CLBatchNormalizationLayerKernel()
@@ -42,7 +45,7 @@ CLBatchNormalizationLayerKernel::CLBatchNormalizationLayerKernel()
 void CLBatchNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma,
                                                 float epsilon)
 {
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F32);
     ARM_COMPUTE_ERROR_ON_NULLPTR(output);
 
     // Output tensor auto initialization if not yet initialized
@@ -54,10 +57,6 @@ void CLBatchNormalizationLayerKernel::configure(const ICLTensor *input, ICLTenso
     ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(mean, var, beta, gamma);
     ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != mean->info()->dimension(0));
 
-    // Set build options
-    std::set<std::string> build_opts;
-    build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
-
     _input   = input;
     _output  = output;
     _mean    = mean;
@@ -66,17 +65,25 @@ void CLBatchNormalizationLayerKernel::configure(const ICLTensor *input, ICLTenso
     _gamma   = gamma;
     _epsilon = epsilon;
 
+    const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size();
+
+    // Set build options
+    std::set<std::string> build_opts;
+    build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
+    build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
+    if(is_data_type_fixed_point(input->info()->data_type()))
+    {
+        build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()));
+    }
+
     // Create kernel
-    std::string kernel_name = "batchnormalization_layer";
-    _kernel                 = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+    _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("batchnormalization_layer", build_opts));
 
     // Set kernel static arguments
     unsigned int idx = 2 * num_arguments_per_3D_tensor() + 4 * num_arguments_per_1D_tensor(); // Skip the input and output parameters
     _kernel.setArg<cl_float>(idx++, _epsilon);
 
     // Configure kernel window
-    const unsigned int num_elems_processed_per_iteration = 4;
-
     Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
 
     AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
diff --git a/tests/validation/CL/BatchNormalizationLayer.cpp b/tests/validation/CL/BatchNormalizationLayer.cpp
new file mode 100644
index 0000000000..9b9df2e902
--- /dev/null
+++ b/tests/validation/CL/BatchNormalizationLayer.cpp
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "Globals.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "dataset/BatchNormalizationLayerDataset.h"
+#include "tests/validation/Helpers.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include <random>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::cl;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+const float tolerance_f    = 1e-05; /**< Tolerance value for comparing reference's output against floating point implementation's output */
+const float tolerance_qs8  = 3;     /**< Tolerance value for comparing reference's output against quantized implementation's output */
+const float tolerance_qs16 = 6;     /**< Tolerance value for comparing reference's output against quantized implementation's output */
+
+/** Compute Neon batch normalization function.
+ *
+ * @param[in] shape     Shape of the input and output tensors.
+ * @param[in] dt        Data type of input and output tensors.
+ * @param[in] norm_info Normalization Layer information.
+ *
+ * @return Computed output tensor.
+ */
+CLTensor compute_reference_batch_normalization_layer(const TensorShape &shape0, const TensorShape &shape1, DataType dt, float epsilon, int fixed_point_position = 0)
+{
+    // Create tensors
+    CLTensor src   = create_tensor<CLTensor>(shape0, dt, 1, fixed_point_position);
+    CLTensor dst   = create_tensor<CLTensor>(shape0, dt, 1, fixed_point_position);
+    CLTensor mean  = create_tensor<CLTensor>(shape1, dt, 1, fixed_point_position);
+    CLTensor var   = create_tensor<CLTensor>(shape1, dt, 1, fixed_point_position);
+    CLTensor beta  = create_tensor<CLTensor>(shape1, dt, 1, fixed_point_position);
+    CLTensor gamma = create_tensor<CLTensor>(shape1, dt, 1, fixed_point_position);
+
+    // Create and configure function
+    CLBatchNormalizationLayer norm;
+    norm.configure(&src, &dst, &mean, &var, &beta, &gamma, epsilon);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+    mean.allocator()->allocate();
+    var.allocator()->allocate();
+    beta.allocator()->allocate();
+    gamma.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+    BOOST_TEST(!mean.info()->is_resizable());
+    BOOST_TEST(!var.info()->is_resizable());
+    BOOST_TEST(!beta.info()->is_resizable());
+    BOOST_TEST(!gamma.info()->is_resizable());
+
+    // Fill tensors
+    if(dt == DataType::F32)
+    {
+        float min_bound = 0.f;
+        float max_bound = 0.f;
+        std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<float>();
+        std::uniform_real_distribution<> distribution(min_bound, max_bound);
+        std::uniform_real_distribution<> distribution_var(0, max_bound);
+        library->fill(CLAccessor(src), distribution, 0);
+        library->fill(CLAccessor(mean), distribution, 1);
+        library->fill(CLAccessor(var), distribution_var, 0);
+        library->fill(CLAccessor(beta), distribution, 3);
+        library->fill(CLAccessor(gamma), distribution, 4);
+    }
+    else
+    {
+        int min_bound = 0;
+        int max_bound = 0;
+        if(dt == DataType::QS8)
+        {
+            std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int8_t>(fixed_point_position);
+        }
+        else
+        {
+            std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int16_t>(fixed_point_position);
+        }
+        std::uniform_int_distribution<> distribution(min_bound, max_bound);
+        std::uniform_int_distribution<> distribution_var(0, max_bound);
+        library->fill(CLAccessor(src), distribution, 0);
+        library->fill(CLAccessor(mean), distribution, 1);
+        library->fill(CLAccessor(var), distribution_var, 0);
+        library->fill(CLAccessor(beta), distribution, 3);
+        library->fill(CLAccessor(gamma), distribution, 4);
+    }
+
+    // Compute function
+    norm.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(CL)
+BOOST_AUTO_TEST_SUITE(BatchNormalizationLayer)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, RandomBatchNormalizationLayerDataset() * boost::unit_test::data::make({ DataType::QS8, DataType::QS16, DataType::F32 }), obj, dt)
+{
+    // Set fixed point position data type allowed
+    int fixed_point_position = (arm_compute::is_data_type_fixed_point(dt)) ? 3 : 0;
+
+    // Create tensors
+    CLTensor src   = create_tensor<CLTensor>(obj.shape0, dt, 1, fixed_point_position);
+    CLTensor dst   = create_tensor<CLTensor>(obj.shape0, dt, 1, fixed_point_position);
+    CLTensor mean  = create_tensor<CLTensor>(obj.shape1, dt, 1, fixed_point_position);
+    CLTensor var   = create_tensor<CLTensor>(obj.shape1, dt, 1, fixed_point_position);
+    CLTensor beta  = create_tensor<CLTensor>(obj.shape1, dt, 1, fixed_point_position);
+    CLTensor gamma = create_tensor<CLTensor>(obj.shape1, dt, 1, fixed_point_position);
+
+    BOOST_TEST(src.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+    BOOST_TEST(mean.info()->is_resizable());
+    BOOST_TEST(var.info()->is_resizable());
+    BOOST_TEST(beta.info()->is_resizable());
+    BOOST_TEST(gamma.info()->is_resizable());
+
+    // Create and configure function
+    CLBatchNormalizationLayer norm;
+    norm.configure(&src, &dst, &mean, &var, &beta, &gamma, obj.epsilon);
+
+    // Validate valid region
+    const ValidRegion valid_region     = shape_to_valid_region(obj.shape0);
+    const ValidRegion valid_region_vec = shape_to_valid_region(obj.shape1);
+    validate(src.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+    validate(mean.info()->valid_region(), valid_region_vec);
+    validate(var.info()->valid_region(), valid_region_vec);
+    validate(beta.info()->valid_region(), valid_region_vec);
+    validate(gamma.info()->valid_region(), valid_region_vec);
+}
+
+BOOST_AUTO_TEST_SUITE(Float)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(Random,
+                     RandomBatchNormalizationLayerDataset() * boost::unit_test::data::make(DataType::F32),
+                     obj, dt)
+{
+    // Compute function
+    CLTensor dst = compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst, tolerance_f, 0);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(Quantized)
+
+BOOST_AUTO_TEST_SUITE(QS8)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(Random,
+                     RandomBatchNormalizationLayerDataset() * boost::unit_test::data::make(DataType::QS8) * boost::unit_test::data::xrange(1, 6),
+                     obj, dt, fixed_point_position)
+{
+    // Compute function
+    CLTensor dst = compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon, fixed_point_position);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst, tolerance_qs8, 0);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(QS16)
+BOOST_DATA_TEST_CASE(Random,
+                     RandomBatchNormalizationLayerDataset() * boost::unit_test::data::make(DataType::QS16) * boost::unit_test::data::xrange(1, 14),
+                     obj, dt, fixed_point_position)
+{
+    // Compute function
+    CLTensor dst = compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon, fixed_point_position);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst, tolerance_qs16, 0);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif /* DOXYGEN_SKIP_THIS */
diff --git a/tests/validation/FixedPoint.h b/tests/validation/FixedPoint.h
index 261fcd6df6..ab6f14a49b 100644
--- a/tests/validation/FixedPoint.h
+++ b/tests/validation/FixedPoint.h
@@ -768,7 +768,7 @@ struct functions
             x2                            = shift_right(mul(x2, three_minus_dx), 1);
         }
 
-        return (shift < 0) ? shift_left(x2, -shift >> 1) : shift_right(x2, shift >> 1);
+        return (shift < 0) ? shift_left(x2, (-shift) >> 1) : shift_right(x2, shift >> 1);
     }
     /** Calculate the hyperbolic tangent of a fixed point number
      *
diff --git a/tests/validation/NEON/BatchNormalizationLayer.cpp b/tests/validation/NEON/BatchNormalizationLayer.cpp
index 38e3751db4..d825c889b6 100644
--- a/tests/validation/NEON/BatchNormalizationLayer.cpp
+++ b/tests/validation/NEON/BatchNormalizationLayer.cpp
@@ -100,7 +100,14 @@ Tensor compute_reference_batch_normalization_layer(const TensorShape &shape0, co
     {
         int min_bound = 0;
         int max_bound = 0;
-        std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int8_t>(fixed_point_position);
+        if(dt == DataType::QS8)
+        {
+            std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int8_t>(fixed_point_position);
+        }
+        else
+        {
+            std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int16_t>(fixed_point_position);
+        }
         std::uniform_int_distribution<> distribution(min_bound, max_bound);
         std::uniform_int_distribution<> distribution_var(0, max_bound);
         library->fill(NEAccessor(src), distribution, 0);
diff --git a/tests/validation/Reference.cpp b/tests/validation/Reference.cpp
index 65705b17de..acf010e7b7 100644
--- a/tests/validation/Reference.cpp
+++ b/tests/validation/Reference.cpp
@@ -505,7 +505,14 @@ RawTensor Reference::compute_reference_batch_normalization_layer(const TensorSha
     {
         int min_bound = 0;
         int max_bound = 0;
-        std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int8_t>(fixed_point_position);
+        if(dt == DataType::QS8)
+        {
+            std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int8_t>(fixed_point_position);
+        }
+        else
+        {
+            std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int16_t>(fixed_point_position);
+        }
         std::uniform_int_distribution<> distribution(min_bound, max_bound);
         std::uniform_int_distribution<> distribution_var(0, max_bound);
         library->fill(ref_src, distribution, 0);
diff --git a/tests/validation/TensorOperations.h b/tests/validation/TensorOperations.h
index 27c50cf6d2..9e6f5cf5d1 100644
--- a/tests/validation/TensorOperations.h
+++ b/tests/validation/TensorOperations.h
@@ -974,17 +974,17 @@ void batch_normalization_layer(const Tensor<T> &in, Tensor<T> &out, const Tensor
                 for(int l = 0; l < cols; ++l)
                 {
                     const int                              pos = l + k * cols + i * rows * cols + r * cols * rows * depth;
-                    fixed_point_arithmetic::fixed_point<T> in_qs8(in[pos], fixed_point_position, true);
-                    fixed_point_arithmetic::fixed_point<T> var_qs8(var[i], fixed_point_position, true);
-                    fixed_point_arithmetic::fixed_point<T> mean_qs8(mean[i], fixed_point_position, true);
-                    fixed_point_arithmetic::fixed_point<T> beta_qs8(beta[i], fixed_point_position, true);
-                    fixed_point_arithmetic::fixed_point<T> gamma_qs8(gamma[i], fixed_point_position, true);
-                    fixed_point_arithmetic::fixed_point<T> epsilon_qs8(epsilon, fixed_point_position);
-
-                    auto denominator = fixed_point_arithmetic::inv_sqrt(var_qs8 + epsilon_qs8);
-                    auto numerator   = in_qs8 - mean_qs8;
+                    fixed_point_arithmetic::fixed_point<T> in_qs(in[pos], fixed_point_position, true);
+                    fixed_point_arithmetic::fixed_point<T> var_qs(var[i], fixed_point_position, true);
+                    fixed_point_arithmetic::fixed_point<T> mean_qs(mean[i], fixed_point_position, true);
+                    fixed_point_arithmetic::fixed_point<T> beta_qs(beta[i], fixed_point_position, true);
+                    fixed_point_arithmetic::fixed_point<T> gamma_qs(gamma[i], fixed_point_position, true);
+                    fixed_point_arithmetic::fixed_point<T> epsilon_qs(epsilon, fixed_point_position);
+
+                    auto denominator = fixed_point_arithmetic::inv_sqrt(var_qs + epsilon_qs);
+                    auto numerator   = in_qs - mean_qs;
                     auto x_bar       = numerator * denominator;
-                    x_bar            = beta_qs8 + x_bar * gamma_qs8;
+                    x_bar            = beta_qs + x_bar * gamma_qs;
                     out[pos]         = x_bar.raw();
                 }
             }
author	Michalis Spyrou <michalis.spyrou@arm.com>	2017-06-26 14:18:47 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-09-17 14:16:42 +0100
commit	172e57028ef14f2f8d6c56edc53c5c85f97e07cd (patch)
tree	b3fe8c05902f07fb2381cf6dfd893654c8ccb63f
parent	579c0498e161215be1a36080b0b454e5198a992a (diff)
download	ComputeLibrary-172e57028ef14f2f8d6c56edc53c5c85f97e07cd.tar.gz