diff options
author | Michalis Spyrou <michalis.spyrou@arm.com> | 2017-06-26 14:18:47 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-09-17 14:16:42 +0100 |
commit | 172e57028ef14f2f8d6c56edc53c5c85f97e07cd (patch) | |
tree | b3fe8c05902f07fb2381cf6dfd893654c8ccb63f | |
parent | 579c0498e161215be1a36080b0b454e5198a992a (diff) | |
download | ComputeLibrary-172e57028ef14f2f8d6c56edc53c5c85f97e07cd.tar.gz |
COMPMID-425 Port CLBatchnormalization to support QS8/QS16
Change-Id: I46c93305f377666ea0915ff789b7dfdfff596087
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78862
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
-rw-r--r-- | arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h | 2 | ||||
-rw-r--r-- | arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h | 2 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/batchnormalization_layer.cl | 69 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/fixed_point.h | 12 | ||||
-rw-r--r-- | src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp | 25 | ||||
-rw-r--r-- | tests/validation/CL/BatchNormalizationLayer.cpp | 228 | ||||
-rw-r--r-- | tests/validation/FixedPoint.h | 2 | ||||
-rw-r--r-- | tests/validation/NEON/BatchNormalizationLayer.cpp | 9 | ||||
-rw-r--r-- | tests/validation/Reference.cpp | 9 | ||||
-rw-r--r-- | tests/validation/TensorOperations.h | 20 |
10 files changed, 333 insertions, 45 deletions
diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h index 088853841b..6df7ae4fc7 100644 --- a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h @@ -50,7 +50,7 @@ public: /** Set the input and output tensors. * - * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. Data types supported: F32. + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. Data types supported: QS8/QS16/F32. * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input * The rest are optional and used for representing batches. * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h index d766d1c69c..882786f1d6 100644 --- a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h @@ -47,7 +47,7 @@ public: /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: F32. + * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F32. * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input diff --git a/src/core/CL/cl_kernels/batchnormalization_layer.cl b/src/core/CL/cl_kernels/batchnormalization_layer.cl index 13e6702334..cb4d0c8947 100644 --- a/src/core/CL/cl_kernels/batchnormalization_layer.cl +++ b/src/core/CL/cl_kernels/batchnormalization_layer.cl @@ -21,11 +21,31 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "helpers.h" +#if defined(FIXED_POINT_POSITION) +#include "fixed_point.h" + +#define ADD_OP(a, b) ADD_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE) +#define SUB_OP(a, b) SUB_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE) +#define MUL_OP(a, b) MUL_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION) +#define INVSQRT_OP(a) INVSQRT_OP_EXPAND((a), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION) +#define SQCVT_SAT(a) SQCVT_SAT_OP_EXPAND((a), DATA_TYPE, FIXED_POINT_POSITION) + +#else /* FIXED_POINT_POSITION */ + +#define ADD_OP(a, b) ((a) + (b)) +#define SUB_OP(a, b) ((a) - (b)) +#define MUL_OP(a, b) ((a) * (b)) +#define INVSQRT_OP(a) rsqrt((a)) +#define SQCVT_SAT(a) (a) + +#endif /* FIXED_POINT_POSITION */ + /** Apply batch normalization. * - * @param[in] input_ptr Pointer to the first source tensor. Supported data types: F32 + * @param[in] input_ptr Pointer to the first source tensor. Supported data types: QS8/QS16/F32 * @param[in] input_stride_x Stride of the first source tensor in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the first source tensor in Y dimension (in bytes) @@ -33,7 +53,7 @@ * @param[in] input_stride_z Stride of the first source tensor in Z dimension (in bytes) * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source tensor - * @param[out] output_ptr Pointer to the destination tensor. Supported data types: F32 + * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes) @@ -41,19 +61,19 @@ * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes) * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[in] mean_ptr Pointer to the mean source tensor. Supported data types: F32 + * @param[in] mean_ptr Pointer to the mean source tensor. Supported data types: same as @p input_ptr * @param[in] mean_stride_x Stride of the mean source tensor in X dimension (in bytes) * @param[in] mean_step_x mean_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] mean_offset_first_element_in_bytes The offset of the first element in the mean source tensor - * @param[in] var_ptr Pointer to the var tensor. Supported data types: F32 + * @param[in] var_ptr Pointer to the var tensor. Supported data types: same as @p input_ptr * @param[in] var_stride_x Stride of the var tensor in X dimension (in bytes) * @param[in] var_step_x var_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] var_offset_first_element_in_bytes The offset of the first element in the var source tensor - * @param[in] beta_ptr Pointer to the beta source tensor. Supported data types: F32 + * @param[in] beta_ptr Pointer to the beta source tensor. Supported data types: same as @p input_ptr * @param[in] beta_stride_x Stride of the beta source tensor in X dimension (in bytes) * @param[in] beta_step_x beta_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] beta_offset_first_element_in_bytes The offset of the first element in the beta source tensor - * @param[in] gamma_ptr Pointer to the gamma source tensor. Supported data types: F32 + * @param[in] gamma_ptr Pointer to the gamma source tensor. Supported data types: same as @p input_ptr * @param[in] gamma_stride_x Stride of the gamma source tensor in X dimension (in bytes) * @param[in] gamma_step_x gamma_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] gamma_offset_first_element_in_bytes The offset of the first element in the gamma source tensor @@ -74,26 +94,33 @@ __kernel void batchnormalization_layer(TENSOR3D_DECLARATION(input), Vector beta = CONVERT_TO_VECTOR_STRUCT(beta); Vector gamma = CONVERT_TO_VECTOR_STRUCT(gamma); - float4 _in = 0; - float4 denominator = 0; - float4 numerator = 0; - float4 x_bar = 0; - float4 gamma_vec = 0; - float4 beta_vec = 0; + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + _in = 0; + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + denominator = 0; + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + numerator = 0; + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + x_bar = 0; + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + gamma_vec = 0; + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + beta_vec = 0; const int current_slice = get_global_id(2); - _in = vload4(0, (__global float *)in.ptr); - denominator = *((__global float *)(var.ptr + current_slice * var.stride_x)); - denominator = rsqrt(denominator + epsilon); + _in = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in.ptr); + denominator = *((__global DATA_TYPE *)(var.ptr + current_slice * var.stride_x)); + denominator = INVSQRT_OP(ADD_OP(denominator, SQCVT_SAT(epsilon))); // Calculate x bar and store results - numerator = *((__global float *)(mean.ptr + current_slice * mean.stride_x)); - numerator = _in - numerator; - x_bar = numerator * denominator; + numerator = *((__global DATA_TYPE *)(mean.ptr + current_slice * mean.stride_x)); + numerator = SUB_OP(_in, numerator); + x_bar = MUL_OP(numerator, denominator); - gamma_vec = *((__global float *)(gamma.ptr + current_slice * beta.stride_x)); - beta_vec = *((__global float *)(beta.ptr + current_slice * beta.stride_x)); + gamma_vec = *((__global DATA_TYPE *)(gamma.ptr + current_slice * beta.stride_x)); + beta_vec = *((__global DATA_TYPE *)(beta.ptr + current_slice * beta.stride_x)); - vstore4(gamma_vec * x_bar + beta_vec, 0, (__global float *)out.ptr); + VSTORE(VEC_SIZE) + (ADD_OP(MUL_OP(gamma_vec, x_bar), beta_vec), 0, (__global DATA_TYPE *)out.ptr); } diff --git a/src/core/CL/cl_kernels/fixed_point.h b/src/core/CL/cl_kernels/fixed_point.h index bb534f5a51..4de7fc576b 100644 --- a/src/core/CL/cl_kernels/fixed_point.h +++ b/src/core/CL/cl_kernels/fixed_point.h @@ -471,4 +471,16 @@ CONVERTQ_DOWN_SAT_IMPL(float16, qs16x16) CONVERTQ_UP_IMPL(qs8x16, float16) CONVERTQ_UP_IMPL(qs16x16, float16) +#define SQCVT_SAT_IMPL(type) \ + inline type sqcvt_##type##_sat(float a, int fixed_point_position) \ + { \ + return CONVERT_SAT((a * (1 << fixed_point_position) + ((a < 0) ? -0.5f : 0.5f)), type); \ + } + +SQCVT_SAT_IMPL(qs8) +SQCVT_SAT_IMPL(qs16) + +#define SQCVT_SAT_OP_EXPAND_STR(a, type, position) sqcvt_##type##_sat((a), (position)) +#define SQCVT_SAT_OP_EXPAND(a, type, position) SQCVT_SAT_OP_EXPAND_STR((a), type, position) + #endif // ARM_COMPUTE_FIXED_POINT_H diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp index 85d8ab7cb4..02bf35a860 100644 --- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp @@ -26,12 +26,15 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/FixedPoint.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "support/ToolchainSupport.h" + using namespace arm_compute; CLBatchNormalizationLayerKernel::CLBatchNormalizationLayerKernel() @@ -42,7 +45,7 @@ CLBatchNormalizationLayerKernel::CLBatchNormalizationLayerKernel() void CLBatchNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F32); ARM_COMPUTE_ERROR_ON_NULLPTR(output); // Output tensor auto initialization if not yet initialized @@ -54,10 +57,6 @@ void CLBatchNormalizationLayerKernel::configure(const ICLTensor *input, ICLTenso ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(mean, var, beta, gamma); ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != mean->info()->dimension(0)); - // Set build options - std::set<std::string> build_opts; - build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); - _input = input; _output = output; _mean = mean; @@ -66,17 +65,25 @@ void CLBatchNormalizationLayerKernel::configure(const ICLTensor *input, ICLTenso _gamma = gamma; _epsilon = epsilon; + const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size(); + + // Set build options + std::set<std::string> build_opts; + build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); + build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); + if(is_data_type_fixed_point(input->info()->data_type())) + { + build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())); + } + // Create kernel - std::string kernel_name = "batchnormalization_layer"; - _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("batchnormalization_layer", build_opts)); // Set kernel static arguments unsigned int idx = 2 * num_arguments_per_3D_tensor() + 4 * num_arguments_per_1D_tensor(); // Skip the input and output parameters _kernel.setArg<cl_float>(idx++, _epsilon); // Configure kernel window - const unsigned int num_elems_processed_per_iteration = 4; - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); diff --git a/tests/validation/CL/BatchNormalizationLayer.cpp b/tests/validation/CL/BatchNormalizationLayer.cpp new file mode 100644 index 0000000000..9b9df2e902 --- /dev/null +++ b/tests/validation/CL/BatchNormalizationLayer.cpp @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "CL/CLAccessor.h" +#include "Globals.h" +#include "TensorLibrary.h" +#include "TypePrinter.h" +#include "Utils.h" +#include "dataset/BatchNormalizationLayerDataset.h" +#include "tests/validation/Helpers.h" +#include "validation/Datasets.h" +#include "validation/Reference.h" +#include "validation/Validation.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include <random> + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::cl; +using namespace arm_compute::test::validation; + +namespace +{ +const float tolerance_f = 1e-05; /**< Tolerance value for comparing reference's output against floating point implementation's output */ +const float tolerance_qs8 = 3; /**< Tolerance value for comparing reference's output against quantized implementation's output */ +const float tolerance_qs16 = 6; /**< Tolerance value for comparing reference's output against quantized implementation's output */ + +/** Compute Neon batch normalization function. + * + * @param[in] shape Shape of the input and output tensors. + * @param[in] dt Data type of input and output tensors. + * @param[in] norm_info Normalization Layer information. + * + * @return Computed output tensor. + */ +CLTensor compute_reference_batch_normalization_layer(const TensorShape &shape0, const TensorShape &shape1, DataType dt, float epsilon, int fixed_point_position = 0) +{ + // Create tensors + CLTensor src = create_tensor<CLTensor>(shape0, dt, 1, fixed_point_position); + CLTensor dst = create_tensor<CLTensor>(shape0, dt, 1, fixed_point_position); + CLTensor mean = create_tensor<CLTensor>(shape1, dt, 1, fixed_point_position); + CLTensor var = create_tensor<CLTensor>(shape1, dt, 1, fixed_point_position); + CLTensor beta = create_tensor<CLTensor>(shape1, dt, 1, fixed_point_position); + CLTensor gamma = create_tensor<CLTensor>(shape1, dt, 1, fixed_point_position); + + // Create and configure function + CLBatchNormalizationLayer norm; + norm.configure(&src, &dst, &mean, &var, &beta, &gamma, epsilon); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + mean.allocator()->allocate(); + var.allocator()->allocate(); + beta.allocator()->allocate(); + gamma.allocator()->allocate(); + + BOOST_TEST(!src.info()->is_resizable()); + BOOST_TEST(!dst.info()->is_resizable()); + BOOST_TEST(!mean.info()->is_resizable()); + BOOST_TEST(!var.info()->is_resizable()); + BOOST_TEST(!beta.info()->is_resizable()); + BOOST_TEST(!gamma.info()->is_resizable()); + + // Fill tensors + if(dt == DataType::F32) + { + float min_bound = 0.f; + float max_bound = 0.f; + std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<float>(); + std::uniform_real_distribution<> distribution(min_bound, max_bound); + std::uniform_real_distribution<> distribution_var(0, max_bound); + library->fill(CLAccessor(src), distribution, 0); + library->fill(CLAccessor(mean), distribution, 1); + library->fill(CLAccessor(var), distribution_var, 0); + library->fill(CLAccessor(beta), distribution, 3); + library->fill(CLAccessor(gamma), distribution, 4); + } + else + { + int min_bound = 0; + int max_bound = 0; + if(dt == DataType::QS8) + { + std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int8_t>(fixed_point_position); + } + else + { + std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int16_t>(fixed_point_position); + } + std::uniform_int_distribution<> distribution(min_bound, max_bound); + std::uniform_int_distribution<> distribution_var(0, max_bound); + library->fill(CLAccessor(src), distribution, 0); + library->fill(CLAccessor(mean), distribution, 1); + library->fill(CLAccessor(var), distribution_var, 0); + library->fill(CLAccessor(beta), distribution, 3); + library->fill(CLAccessor(gamma), distribution, 4); + } + + // Compute function + norm.run(); + + return dst; +} +} // namespace + +#ifndef DOXYGEN_SKIP_THIS +BOOST_AUTO_TEST_SUITE(CL) +BOOST_AUTO_TEST_SUITE(BatchNormalizationLayer) + +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly")) +BOOST_DATA_TEST_CASE(Configuration, RandomBatchNormalizationLayerDataset() * boost::unit_test::data::make({ DataType::QS8, DataType::QS16, DataType::F32 }), obj, dt) +{ + // Set fixed point position data type allowed + int fixed_point_position = (arm_compute::is_data_type_fixed_point(dt)) ? 3 : 0; + + // Create tensors + CLTensor src = create_tensor<CLTensor>(obj.shape0, dt, 1, fixed_point_position); + CLTensor dst = create_tensor<CLTensor>(obj.shape0, dt, 1, fixed_point_position); + CLTensor mean = create_tensor<CLTensor>(obj.shape1, dt, 1, fixed_point_position); + CLTensor var = create_tensor<CLTensor>(obj.shape1, dt, 1, fixed_point_position); + CLTensor beta = create_tensor<CLTensor>(obj.shape1, dt, 1, fixed_point_position); + CLTensor gamma = create_tensor<CLTensor>(obj.shape1, dt, 1, fixed_point_position); + + BOOST_TEST(src.info()->is_resizable()); + BOOST_TEST(dst.info()->is_resizable()); + BOOST_TEST(mean.info()->is_resizable()); + BOOST_TEST(var.info()->is_resizable()); + BOOST_TEST(beta.info()->is_resizable()); + BOOST_TEST(gamma.info()->is_resizable()); + + // Create and configure function + CLBatchNormalizationLayer norm; + norm.configure(&src, &dst, &mean, &var, &beta, &gamma, obj.epsilon); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(obj.shape0); + const ValidRegion valid_region_vec = shape_to_valid_region(obj.shape1); + validate(src.info()->valid_region(), valid_region); + validate(dst.info()->valid_region(), valid_region); + validate(mean.info()->valid_region(), valid_region_vec); + validate(var.info()->valid_region(), valid_region_vec); + validate(beta.info()->valid_region(), valid_region_vec); + validate(gamma.info()->valid_region(), valid_region_vec); +} + +BOOST_AUTO_TEST_SUITE(Float) +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit")) +BOOST_DATA_TEST_CASE(Random, + RandomBatchNormalizationLayerDataset() * boost::unit_test::data::make(DataType::F32), + obj, dt) +{ + // Compute function + CLTensor dst = compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon); + + // Validate output + validate(CLAccessor(dst), ref_dst, tolerance_f, 0); +} +BOOST_AUTO_TEST_SUITE_END() + +BOOST_AUTO_TEST_SUITE(Quantized) + +BOOST_AUTO_TEST_SUITE(QS8) +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit")) +BOOST_DATA_TEST_CASE(Random, + RandomBatchNormalizationLayerDataset() * boost::unit_test::data::make(DataType::QS8) * boost::unit_test::data::xrange(1, 6), + obj, dt, fixed_point_position) +{ + // Compute function + CLTensor dst = compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon, fixed_point_position); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon, fixed_point_position); + + // Validate output + validate(CLAccessor(dst), ref_dst, tolerance_qs8, 0); +} +BOOST_AUTO_TEST_SUITE_END() + +BOOST_AUTO_TEST_SUITE(QS16) +BOOST_DATA_TEST_CASE(Random, + RandomBatchNormalizationLayerDataset() * boost::unit_test::data::make(DataType::QS16) * boost::unit_test::data::xrange(1, 14), + obj, dt, fixed_point_position) +{ + // Compute function + CLTensor dst = compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon, fixed_point_position); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon, fixed_point_position); + + // Validate output + validate(CLAccessor(dst), ref_dst, tolerance_qs16, 0); +} +BOOST_AUTO_TEST_SUITE_END() + +BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() +#endif /* DOXYGEN_SKIP_THIS */ diff --git a/tests/validation/FixedPoint.h b/tests/validation/FixedPoint.h index 261fcd6df6..ab6f14a49b 100644 --- a/tests/validation/FixedPoint.h +++ b/tests/validation/FixedPoint.h @@ -768,7 +768,7 @@ struct functions x2 = shift_right(mul(x2, three_minus_dx), 1); } - return (shift < 0) ? shift_left(x2, -shift >> 1) : shift_right(x2, shift >> 1); + return (shift < 0) ? shift_left(x2, (-shift) >> 1) : shift_right(x2, shift >> 1); } /** Calculate the hyperbolic tangent of a fixed point number * diff --git a/tests/validation/NEON/BatchNormalizationLayer.cpp b/tests/validation/NEON/BatchNormalizationLayer.cpp index 38e3751db4..d825c889b6 100644 --- a/tests/validation/NEON/BatchNormalizationLayer.cpp +++ b/tests/validation/NEON/BatchNormalizationLayer.cpp @@ -100,7 +100,14 @@ Tensor compute_reference_batch_normalization_layer(const TensorShape &shape0, co { int min_bound = 0; int max_bound = 0; - std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int8_t>(fixed_point_position); + if(dt == DataType::QS8) + { + std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int8_t>(fixed_point_position); + } + else + { + std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int16_t>(fixed_point_position); + } std::uniform_int_distribution<> distribution(min_bound, max_bound); std::uniform_int_distribution<> distribution_var(0, max_bound); library->fill(NEAccessor(src), distribution, 0); diff --git a/tests/validation/Reference.cpp b/tests/validation/Reference.cpp index 65705b17de..acf010e7b7 100644 --- a/tests/validation/Reference.cpp +++ b/tests/validation/Reference.cpp @@ -505,7 +505,14 @@ RawTensor Reference::compute_reference_batch_normalization_layer(const TensorSha { int min_bound = 0; int max_bound = 0; - std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int8_t>(fixed_point_position); + if(dt == DataType::QS8) + { + std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int8_t>(fixed_point_position); + } + else + { + std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int16_t>(fixed_point_position); + } std::uniform_int_distribution<> distribution(min_bound, max_bound); std::uniform_int_distribution<> distribution_var(0, max_bound); library->fill(ref_src, distribution, 0); diff --git a/tests/validation/TensorOperations.h b/tests/validation/TensorOperations.h index 27c50cf6d2..9e6f5cf5d1 100644 --- a/tests/validation/TensorOperations.h +++ b/tests/validation/TensorOperations.h @@ -974,17 +974,17 @@ void batch_normalization_layer(const Tensor<T> &in, Tensor<T> &out, const Tensor for(int l = 0; l < cols; ++l) { const int pos = l + k * cols + i * rows * cols + r * cols * rows * depth; - fixed_point_arithmetic::fixed_point<T> in_qs8(in[pos], fixed_point_position, true); - fixed_point_arithmetic::fixed_point<T> var_qs8(var[i], fixed_point_position, true); - fixed_point_arithmetic::fixed_point<T> mean_qs8(mean[i], fixed_point_position, true); - fixed_point_arithmetic::fixed_point<T> beta_qs8(beta[i], fixed_point_position, true); - fixed_point_arithmetic::fixed_point<T> gamma_qs8(gamma[i], fixed_point_position, true); - fixed_point_arithmetic::fixed_point<T> epsilon_qs8(epsilon, fixed_point_position); - - auto denominator = fixed_point_arithmetic::inv_sqrt(var_qs8 + epsilon_qs8); - auto numerator = in_qs8 - mean_qs8; + fixed_point_arithmetic::fixed_point<T> in_qs(in[pos], fixed_point_position, true); + fixed_point_arithmetic::fixed_point<T> var_qs(var[i], fixed_point_position, true); + fixed_point_arithmetic::fixed_point<T> mean_qs(mean[i], fixed_point_position, true); + fixed_point_arithmetic::fixed_point<T> beta_qs(beta[i], fixed_point_position, true); + fixed_point_arithmetic::fixed_point<T> gamma_qs(gamma[i], fixed_point_position, true); + fixed_point_arithmetic::fixed_point<T> epsilon_qs(epsilon, fixed_point_position); + + auto denominator = fixed_point_arithmetic::inv_sqrt(var_qs + epsilon_qs); + auto numerator = in_qs - mean_qs; auto x_bar = numerator * denominator; - x_bar = beta_qs8 + x_bar * gamma_qs8; + x_bar = beta_qs + x_bar * gamma_qs; out[pos] = x_bar.raw(); } } |