aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2017-06-26 14:18:47 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 14:16:42 +0100
commit172e57028ef14f2f8d6c56edc53c5c85f97e07cd (patch)
treeb3fe8c05902f07fb2381cf6dfd893654c8ccb63f
parent579c0498e161215be1a36080b0b454e5198a992a (diff)
downloadComputeLibrary-172e57028ef14f2f8d6c56edc53c5c85f97e07cd.tar.gz
COMPMID-425 Port CLBatchnormalization to support QS8/QS16
Change-Id: I46c93305f377666ea0915ff789b7dfdfff596087 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78862 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
-rw-r--r--arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h2
-rw-r--r--src/core/CL/cl_kernels/batchnormalization_layer.cl69
-rw-r--r--src/core/CL/cl_kernels/fixed_point.h12
-rw-r--r--src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp25
-rw-r--r--tests/validation/CL/BatchNormalizationLayer.cpp228
-rw-r--r--tests/validation/FixedPoint.h2
-rw-r--r--tests/validation/NEON/BatchNormalizationLayer.cpp9
-rw-r--r--tests/validation/Reference.cpp9
-rw-r--r--tests/validation/TensorOperations.h20
10 files changed, 333 insertions, 45 deletions
diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
index 088853841b..6df7ae4fc7 100644
--- a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
@@ -50,7 +50,7 @@ public:
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. Data types supported: F32.
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. Data types supported: QS8/QS16/F32.
* @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
* The rest are optional and used for representing batches.
* @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
index d766d1c69c..882786f1d6 100644
--- a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
@@ -47,7 +47,7 @@ public:
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: F32.
+ * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F32.
* @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
* @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
* @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
diff --git a/src/core/CL/cl_kernels/batchnormalization_layer.cl b/src/core/CL/cl_kernels/batchnormalization_layer.cl
index 13e6702334..cb4d0c8947 100644
--- a/src/core/CL/cl_kernels/batchnormalization_layer.cl
+++ b/src/core/CL/cl_kernels/batchnormalization_layer.cl
@@ -21,11 +21,31 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "helpers.h"
+#if defined(FIXED_POINT_POSITION)
+#include "fixed_point.h"
+
+#define ADD_OP(a, b) ADD_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE)
+#define SUB_OP(a, b) SUB_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE)
+#define MUL_OP(a, b) MUL_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
+#define INVSQRT_OP(a) INVSQRT_OP_EXPAND((a), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
+#define SQCVT_SAT(a) SQCVT_SAT_OP_EXPAND((a), DATA_TYPE, FIXED_POINT_POSITION)
+
+#else /* FIXED_POINT_POSITION */
+
+#define ADD_OP(a, b) ((a) + (b))
+#define SUB_OP(a, b) ((a) - (b))
+#define MUL_OP(a, b) ((a) * (b))
+#define INVSQRT_OP(a) rsqrt((a))
+#define SQCVT_SAT(a) (a)
+
+#endif /* FIXED_POINT_POSITION */
+
/** Apply batch normalization.
*
- * @param[in] input_ptr Pointer to the first source tensor. Supported data types: F32
+ * @param[in] input_ptr Pointer to the first source tensor. Supported data types: QS8/QS16/F32
* @param[in] input_stride_x Stride of the first source tensor in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the first source tensor in Y dimension (in bytes)
@@ -33,7 +53,7 @@
* @param[in] input_stride_z Stride of the first source tensor in Z dimension (in bytes)
* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source tensor
- * @param[out] output_ptr Pointer to the destination tensor. Supported data types: F32
+ * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr
* @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
@@ -41,19 +61,19 @@
* @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes)
* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
- * @param[in] mean_ptr Pointer to the mean source tensor. Supported data types: F32
+ * @param[in] mean_ptr Pointer to the mean source tensor. Supported data types: same as @p input_ptr
* @param[in] mean_stride_x Stride of the mean source tensor in X dimension (in bytes)
* @param[in] mean_step_x mean_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] mean_offset_first_element_in_bytes The offset of the first element in the mean source tensor
- * @param[in] var_ptr Pointer to the var tensor. Supported data types: F32
+ * @param[in] var_ptr Pointer to the var tensor. Supported data types: same as @p input_ptr
* @param[in] var_stride_x Stride of the var tensor in X dimension (in bytes)
* @param[in] var_step_x var_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] var_offset_first_element_in_bytes The offset of the first element in the var source tensor
- * @param[in] beta_ptr Pointer to the beta source tensor. Supported data types: F32
+ * @param[in] beta_ptr Pointer to the beta source tensor. Supported data types: same as @p input_ptr
* @param[in] beta_stride_x Stride of the beta source tensor in X dimension (in bytes)
* @param[in] beta_step_x beta_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] beta_offset_first_element_in_bytes The offset of the first element in the beta source tensor
- * @param[in] gamma_ptr Pointer to the gamma source tensor. Supported data types: F32
+ * @param[in] gamma_ptr Pointer to the gamma source tensor. Supported data types: same as @p input_ptr
* @param[in] gamma_stride_x Stride of the gamma source tensor in X dimension (in bytes)
* @param[in] gamma_step_x gamma_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] gamma_offset_first_element_in_bytes The offset of the first element in the gamma source tensor
@@ -74,26 +94,33 @@ __kernel void batchnormalization_layer(TENSOR3D_DECLARATION(input),
Vector beta = CONVERT_TO_VECTOR_STRUCT(beta);
Vector gamma = CONVERT_TO_VECTOR_STRUCT(gamma);
- float4 _in = 0;
- float4 denominator = 0;
- float4 numerator = 0;
- float4 x_bar = 0;
- float4 gamma_vec = 0;
- float4 beta_vec = 0;
+ VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+ _in = 0;
+ VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+ denominator = 0;
+ VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+ numerator = 0;
+ VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+ x_bar = 0;
+ VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+ gamma_vec = 0;
+ VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+ beta_vec = 0;
const int current_slice = get_global_id(2);
- _in = vload4(0, (__global float *)in.ptr);
- denominator = *((__global float *)(var.ptr + current_slice * var.stride_x));
- denominator = rsqrt(denominator + epsilon);
+ _in = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in.ptr);
+ denominator = *((__global DATA_TYPE *)(var.ptr + current_slice * var.stride_x));
+ denominator = INVSQRT_OP(ADD_OP(denominator, SQCVT_SAT(epsilon)));
// Calculate x bar and store results
- numerator = *((__global float *)(mean.ptr + current_slice * mean.stride_x));
- numerator = _in - numerator;
- x_bar = numerator * denominator;
+ numerator = *((__global DATA_TYPE *)(mean.ptr + current_slice * mean.stride_x));
+ numerator = SUB_OP(_in, numerator);
+ x_bar = MUL_OP(numerator, denominator);
- gamma_vec = *((__global float *)(gamma.ptr + current_slice * beta.stride_x));
- beta_vec = *((__global float *)(beta.ptr + current_slice * beta.stride_x));
+ gamma_vec = *((__global DATA_TYPE *)(gamma.ptr + current_slice * beta.stride_x));
+ beta_vec = *((__global DATA_TYPE *)(beta.ptr + current_slice * beta.stride_x));
- vstore4(gamma_vec * x_bar + beta_vec, 0, (__global float *)out.ptr);
+ VSTORE(VEC_SIZE)
+ (ADD_OP(MUL_OP(gamma_vec, x_bar), beta_vec), 0, (__global DATA_TYPE *)out.ptr);
}
diff --git a/src/core/CL/cl_kernels/fixed_point.h b/src/core/CL/cl_kernels/fixed_point.h
index bb534f5a51..4de7fc576b 100644
--- a/src/core/CL/cl_kernels/fixed_point.h
+++ b/src/core/CL/cl_kernels/fixed_point.h
@@ -471,4 +471,16 @@ CONVERTQ_DOWN_SAT_IMPL(float16, qs16x16)
CONVERTQ_UP_IMPL(qs8x16, float16)
CONVERTQ_UP_IMPL(qs16x16, float16)
+#define SQCVT_SAT_IMPL(type) \
+ inline type sqcvt_##type##_sat(float a, int fixed_point_position) \
+ { \
+ return CONVERT_SAT((a * (1 << fixed_point_position) + ((a < 0) ? -0.5f : 0.5f)), type); \
+ }
+
+SQCVT_SAT_IMPL(qs8)
+SQCVT_SAT_IMPL(qs16)
+
+#define SQCVT_SAT_OP_EXPAND_STR(a, type, position) sqcvt_##type##_sat((a), (position))
+#define SQCVT_SAT_OP_EXPAND(a, type, position) SQCVT_SAT_OP_EXPAND_STR((a), type, position)
+
#endif // ARM_COMPUTE_FIXED_POINT_H
diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
index 85d8ab7cb4..02bf35a860 100644
--- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
@@ -26,12 +26,15 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/FixedPoint.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "support/ToolchainSupport.h"
+
using namespace arm_compute;
CLBatchNormalizationLayerKernel::CLBatchNormalizationLayerKernel()
@@ -42,7 +45,7 @@ CLBatchNormalizationLayerKernel::CLBatchNormalizationLayerKernel()
void CLBatchNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma,
float epsilon)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F32);
ARM_COMPUTE_ERROR_ON_NULLPTR(output);
// Output tensor auto initialization if not yet initialized
@@ -54,10 +57,6 @@ void CLBatchNormalizationLayerKernel::configure(const ICLTensor *input, ICLTenso
ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(mean, var, beta, gamma);
ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != mean->info()->dimension(0));
- // Set build options
- std::set<std::string> build_opts;
- build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
-
_input = input;
_output = output;
_mean = mean;
@@ -66,17 +65,25 @@ void CLBatchNormalizationLayerKernel::configure(const ICLTensor *input, ICLTenso
_gamma = gamma;
_epsilon = epsilon;
+ const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size();
+
+ // Set build options
+ std::set<std::string> build_opts;
+ build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
+ build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
+ if(is_data_type_fixed_point(input->info()->data_type()))
+ {
+ build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()));
+ }
+
// Create kernel
- std::string kernel_name = "batchnormalization_layer";
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("batchnormalization_layer", build_opts));
// Set kernel static arguments
unsigned int idx = 2 * num_arguments_per_3D_tensor() + 4 * num_arguments_per_1D_tensor(); // Skip the input and output parameters
_kernel.setArg<cl_float>(idx++, _epsilon);
// Configure kernel window
- const unsigned int num_elems_processed_per_iteration = 4;
-
Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
diff --git a/tests/validation/CL/BatchNormalizationLayer.cpp b/tests/validation/CL/BatchNormalizationLayer.cpp
new file mode 100644
index 0000000000..9b9df2e902
--- /dev/null
+++ b/tests/validation/CL/BatchNormalizationLayer.cpp
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "Globals.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "dataset/BatchNormalizationLayerDataset.h"
+#include "tests/validation/Helpers.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include <random>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::cl;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+const float tolerance_f = 1e-05; /**< Tolerance value for comparing reference's output against floating point implementation's output */
+const float tolerance_qs8 = 3; /**< Tolerance value for comparing reference's output against quantized implementation's output */
+const float tolerance_qs16 = 6; /**< Tolerance value for comparing reference's output against quantized implementation's output */
+
+/** Compute Neon batch normalization function.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ * @param[in] dt Data type of input and output tensors.
+ * @param[in] norm_info Normalization Layer information.
+ *
+ * @return Computed output tensor.
+ */
+CLTensor compute_reference_batch_normalization_layer(const TensorShape &shape0, const TensorShape &shape1, DataType dt, float epsilon, int fixed_point_position = 0)
+{
+ // Create tensors
+ CLTensor src = create_tensor<CLTensor>(shape0, dt, 1, fixed_point_position);
+ CLTensor dst = create_tensor<CLTensor>(shape0, dt, 1, fixed_point_position);
+ CLTensor mean = create_tensor<CLTensor>(shape1, dt, 1, fixed_point_position);
+ CLTensor var = create_tensor<CLTensor>(shape1, dt, 1, fixed_point_position);
+ CLTensor beta = create_tensor<CLTensor>(shape1, dt, 1, fixed_point_position);
+ CLTensor gamma = create_tensor<CLTensor>(shape1, dt, 1, fixed_point_position);
+
+ // Create and configure function
+ CLBatchNormalizationLayer norm;
+ norm.configure(&src, &dst, &mean, &var, &beta, &gamma, epsilon);
+
+ // Allocate tensors
+ src.allocator()->allocate();
+ dst.allocator()->allocate();
+ mean.allocator()->allocate();
+ var.allocator()->allocate();
+ beta.allocator()->allocate();
+ gamma.allocator()->allocate();
+
+ BOOST_TEST(!src.info()->is_resizable());
+ BOOST_TEST(!dst.info()->is_resizable());
+ BOOST_TEST(!mean.info()->is_resizable());
+ BOOST_TEST(!var.info()->is_resizable());
+ BOOST_TEST(!beta.info()->is_resizable());
+ BOOST_TEST(!gamma.info()->is_resizable());
+
+ // Fill tensors
+ if(dt == DataType::F32)
+ {
+ float min_bound = 0.f;
+ float max_bound = 0.f;
+ std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<float>();
+ std::uniform_real_distribution<> distribution(min_bound, max_bound);
+ std::uniform_real_distribution<> distribution_var(0, max_bound);
+ library->fill(CLAccessor(src), distribution, 0);
+ library->fill(CLAccessor(mean), distribution, 1);
+ library->fill(CLAccessor(var), distribution_var, 0);
+ library->fill(CLAccessor(beta), distribution, 3);
+ library->fill(CLAccessor(gamma), distribution, 4);
+ }
+ else
+ {
+ int min_bound = 0;
+ int max_bound = 0;
+ if(dt == DataType::QS8)
+ {
+ std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int8_t>(fixed_point_position);
+ }
+ else
+ {
+ std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int16_t>(fixed_point_position);
+ }
+ std::uniform_int_distribution<> distribution(min_bound, max_bound);
+ std::uniform_int_distribution<> distribution_var(0, max_bound);
+ library->fill(CLAccessor(src), distribution, 0);
+ library->fill(CLAccessor(mean), distribution, 1);
+ library->fill(CLAccessor(var), distribution_var, 0);
+ library->fill(CLAccessor(beta), distribution, 3);
+ library->fill(CLAccessor(gamma), distribution, 4);
+ }
+
+ // Compute function
+ norm.run();
+
+ return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(CL)
+BOOST_AUTO_TEST_SUITE(BatchNormalizationLayer)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, RandomBatchNormalizationLayerDataset() * boost::unit_test::data::make({ DataType::QS8, DataType::QS16, DataType::F32 }), obj, dt)
+{
+ // Set fixed point position data type allowed
+ int fixed_point_position = (arm_compute::is_data_type_fixed_point(dt)) ? 3 : 0;
+
+ // Create tensors
+ CLTensor src = create_tensor<CLTensor>(obj.shape0, dt, 1, fixed_point_position);
+ CLTensor dst = create_tensor<CLTensor>(obj.shape0, dt, 1, fixed_point_position);
+ CLTensor mean = create_tensor<CLTensor>(obj.shape1, dt, 1, fixed_point_position);
+ CLTensor var = create_tensor<CLTensor>(obj.shape1, dt, 1, fixed_point_position);
+ CLTensor beta = create_tensor<CLTensor>(obj.shape1, dt, 1, fixed_point_position);
+ CLTensor gamma = create_tensor<CLTensor>(obj.shape1, dt, 1, fixed_point_position);
+
+ BOOST_TEST(src.info()->is_resizable());
+ BOOST_TEST(dst.info()->is_resizable());
+ BOOST_TEST(mean.info()->is_resizable());
+ BOOST_TEST(var.info()->is_resizable());
+ BOOST_TEST(beta.info()->is_resizable());
+ BOOST_TEST(gamma.info()->is_resizable());
+
+ // Create and configure function
+ CLBatchNormalizationLayer norm;
+ norm.configure(&src, &dst, &mean, &var, &beta, &gamma, obj.epsilon);
+
+ // Validate valid region
+ const ValidRegion valid_region = shape_to_valid_region(obj.shape0);
+ const ValidRegion valid_region_vec = shape_to_valid_region(obj.shape1);
+ validate(src.info()->valid_region(), valid_region);
+ validate(dst.info()->valid_region(), valid_region);
+ validate(mean.info()->valid_region(), valid_region_vec);
+ validate(var.info()->valid_region(), valid_region_vec);
+ validate(beta.info()->valid_region(), valid_region_vec);
+ validate(gamma.info()->valid_region(), valid_region_vec);
+}
+
+BOOST_AUTO_TEST_SUITE(Float)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(Random,
+ RandomBatchNormalizationLayerDataset() * boost::unit_test::data::make(DataType::F32),
+ obj, dt)
+{
+ // Compute function
+ CLTensor dst = compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon);
+
+ // Compute reference
+ RawTensor ref_dst = Reference::compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon);
+
+ // Validate output
+ validate(CLAccessor(dst), ref_dst, tolerance_f, 0);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(Quantized)
+
+BOOST_AUTO_TEST_SUITE(QS8)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(Random,
+ RandomBatchNormalizationLayerDataset() * boost::unit_test::data::make(DataType::QS8) * boost::unit_test::data::xrange(1, 6),
+ obj, dt, fixed_point_position)
+{
+ // Compute function
+ CLTensor dst = compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon, fixed_point_position);
+
+ // Compute reference
+ RawTensor ref_dst = Reference::compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon, fixed_point_position);
+
+ // Validate output
+ validate(CLAccessor(dst), ref_dst, tolerance_qs8, 0);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(QS16)
+BOOST_DATA_TEST_CASE(Random,
+ RandomBatchNormalizationLayerDataset() * boost::unit_test::data::make(DataType::QS16) * boost::unit_test::data::xrange(1, 14),
+ obj, dt, fixed_point_position)
+{
+ // Compute function
+ CLTensor dst = compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon, fixed_point_position);
+
+ // Compute reference
+ RawTensor ref_dst = Reference::compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon, fixed_point_position);
+
+ // Validate output
+ validate(CLAccessor(dst), ref_dst, tolerance_qs16, 0);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif /* DOXYGEN_SKIP_THIS */
diff --git a/tests/validation/FixedPoint.h b/tests/validation/FixedPoint.h
index 261fcd6df6..ab6f14a49b 100644
--- a/tests/validation/FixedPoint.h
+++ b/tests/validation/FixedPoint.h
@@ -768,7 +768,7 @@ struct functions
x2 = shift_right(mul(x2, three_minus_dx), 1);
}
- return (shift < 0) ? shift_left(x2, -shift >> 1) : shift_right(x2, shift >> 1);
+ return (shift < 0) ? shift_left(x2, (-shift) >> 1) : shift_right(x2, shift >> 1);
}
/** Calculate the hyperbolic tangent of a fixed point number
*
diff --git a/tests/validation/NEON/BatchNormalizationLayer.cpp b/tests/validation/NEON/BatchNormalizationLayer.cpp
index 38e3751db4..d825c889b6 100644
--- a/tests/validation/NEON/BatchNormalizationLayer.cpp
+++ b/tests/validation/NEON/BatchNormalizationLayer.cpp
@@ -100,7 +100,14 @@ Tensor compute_reference_batch_normalization_layer(const TensorShape &shape0, co
{
int min_bound = 0;
int max_bound = 0;
- std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int8_t>(fixed_point_position);
+ if(dt == DataType::QS8)
+ {
+ std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int8_t>(fixed_point_position);
+ }
+ else
+ {
+ std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int16_t>(fixed_point_position);
+ }
std::uniform_int_distribution<> distribution(min_bound, max_bound);
std::uniform_int_distribution<> distribution_var(0, max_bound);
library->fill(NEAccessor(src), distribution, 0);
diff --git a/tests/validation/Reference.cpp b/tests/validation/Reference.cpp
index 65705b17de..acf010e7b7 100644
--- a/tests/validation/Reference.cpp
+++ b/tests/validation/Reference.cpp
@@ -505,7 +505,14 @@ RawTensor Reference::compute_reference_batch_normalization_layer(const TensorSha
{
int min_bound = 0;
int max_bound = 0;
- std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int8_t>(fixed_point_position);
+ if(dt == DataType::QS8)
+ {
+ std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int8_t>(fixed_point_position);
+ }
+ else
+ {
+ std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int16_t>(fixed_point_position);
+ }
std::uniform_int_distribution<> distribution(min_bound, max_bound);
std::uniform_int_distribution<> distribution_var(0, max_bound);
library->fill(ref_src, distribution, 0);
diff --git a/tests/validation/TensorOperations.h b/tests/validation/TensorOperations.h
index 27c50cf6d2..9e6f5cf5d1 100644
--- a/tests/validation/TensorOperations.h
+++ b/tests/validation/TensorOperations.h
@@ -974,17 +974,17 @@ void batch_normalization_layer(const Tensor<T> &in, Tensor<T> &out, const Tensor
for(int l = 0; l < cols; ++l)
{
const int pos = l + k * cols + i * rows * cols + r * cols * rows * depth;
- fixed_point_arithmetic::fixed_point<T> in_qs8(in[pos], fixed_point_position, true);
- fixed_point_arithmetic::fixed_point<T> var_qs8(var[i], fixed_point_position, true);
- fixed_point_arithmetic::fixed_point<T> mean_qs8(mean[i], fixed_point_position, true);
- fixed_point_arithmetic::fixed_point<T> beta_qs8(beta[i], fixed_point_position, true);
- fixed_point_arithmetic::fixed_point<T> gamma_qs8(gamma[i], fixed_point_position, true);
- fixed_point_arithmetic::fixed_point<T> epsilon_qs8(epsilon, fixed_point_position);
-
- auto denominator = fixed_point_arithmetic::inv_sqrt(var_qs8 + epsilon_qs8);
- auto numerator = in_qs8 - mean_qs8;
+ fixed_point_arithmetic::fixed_point<T> in_qs(in[pos], fixed_point_position, true);
+ fixed_point_arithmetic::fixed_point<T> var_qs(var[i], fixed_point_position, true);
+ fixed_point_arithmetic::fixed_point<T> mean_qs(mean[i], fixed_point_position, true);
+ fixed_point_arithmetic::fixed_point<T> beta_qs(beta[i], fixed_point_position, true);
+ fixed_point_arithmetic::fixed_point<T> gamma_qs(gamma[i], fixed_point_position, true);
+ fixed_point_arithmetic::fixed_point<T> epsilon_qs(epsilon, fixed_point_position);
+
+ auto denominator = fixed_point_arithmetic::inv_sqrt(var_qs + epsilon_qs);
+ auto numerator = in_qs - mean_qs;
auto x_bar = numerator * denominator;
- x_bar = beta_qs8 + x_bar * gamma_qs8;
+ x_bar = beta_qs + x_bar * gamma_qs;
out[pos] = x_bar.raw();
}
}