From ed4b8a07e67c7802207c8954a88ad7a91aec79e0 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Wed, 12 May 2021 12:28:58 +0100 Subject: Fix MeanStdDevNormalizationLayer reference outputting nan for FP16 - Bring the epsilon up to 1e-3 for FP16 (both backends) since it was causing the reference's variance being negative and its square root being NaN - Bring the epsilon up to 1e-7 for FP16 NEON test for the same problem on the NEON kernel - Adjust the CL kernel's vec_size when input tensor's width < 16 and use macros agnostic of vector size for sum reduction - Add previously mismatching tensor shapes Resolve COMPMID-4354 Change-Id: I823c871aacb72326f90c86b24cb16c3e2d4bd15e Signed-off-by: Giorgio Arena Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5630 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas --- .../CL/cl_kernels/mean_stddev_normalization.cl | 32 +++++++++------------- .../CL/kernels/CLMeanStdDevNormalizationKernel.cpp | 29 ++++++-------------- tests/datasets/ShapeDatasets.h | 6 ++-- .../validation/CL/MeanStdDevNormalizationLayer.cpp | 4 +-- .../NEON/MeanStdDevNormalizationLayer.cpp | 4 +-- 5 files changed, 28 insertions(+), 47 deletions(-) diff --git a/src/core/CL/cl_kernels/mean_stddev_normalization.cl b/src/core/CL/cl_kernels/mean_stddev_normalization.cl index 4141d3e8b7..76be629934 100644 --- a/src/core/CL/cl_kernels/mean_stddev_normalization.cl +++ b/src/core/CL/cl_kernels/mean_stddev_normalization.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019, 2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -76,31 +76,25 @@ __kernel void mean_stddev_normalization( sum_sq += data * data; } // Perform reduction -#if VEC_SIZE > 8 - sum.s01234567 += sum.s89abcdef; - sum_sq.s01234567 += sum_sq.s89abcdef; -#endif // VEC_SIZE > 8 -#if VEC_SIZE > 4 - sum.s0123 += sum.s4567; - sum_sq.s0123 += sum_sq.s4567; -#endif // VEC_SIZE > 4 -#if VEC_SIZE > 2 - sum.s01 += sum.s23; - sum_sq.s01 += sum_sq.s23; -#endif // VEC_SIZE > 2 - sum.s0 += sum.s1; - sum_sq.s0 += sum_sq.s1; + sum = SUM_REDUCE(sum, VEC_SIZE); + sum_sq = SUM_REDUCE(sum_sq, VEC_SIZE); + +#if VEC_SIZE > 1 +#define sum sum.s0 +#define sum_sq sum_sq.s0 +#endif // VEC_SIZE > 1 + // Left-overs loop for(; i < WIDTH; ++i) { DATA_TYPE data = *((__global DATA_TYPE *)offset(&in, i, 0)); - sum.s0 += data; - sum_sq.s0 += data * data; + sum += data; + sum_sq += data * data; } - DATA_TYPE mean = sum.s0 / WIDTH; - DATA_TYPE var = (sum_sq.s0 / WIDTH) - (mean * mean); + DATA_TYPE mean = sum / WIDTH; + DATA_TYPE var = (sum_sq / WIDTH) - (mean * mean); DATA_TYPE stddev_inv = 1.f / sqrt(var + EPSILON); i = 0; diff --git a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp index 7dc34f17b5..9f98b67582 100644 --- a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp @@ -54,22 +54,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, f } return Status{}; } - -std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) -{ - if(output != nullptr) - { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output, *input); - } - - const unsigned int num_elems_processed_per_iteration = 16 / input->element_size(); - - // This kernel doesn't need padding - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); - return std::make_pair(Status{}, win); -} } // namespace CLMeanStdDevNormalizationKernel::CLMeanStdDevNormalizationKernel() @@ -90,10 +74,15 @@ void CLMeanStdDevNormalizationKernel::configure(const CLCompileContext &compile_ ARM_COMPUTE_ERROR_THROW_ON(CLMeanStdDevNormalizationKernel::validate(input->info(), (output != nullptr) ? output->info() : nullptr, epsilon)); + if(output != nullptr) + { + auto_init_if_empty(*output->info(), *input->info()); + } + _input = input; _output = output; - const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size(); + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / input->info()->element_size(), input->info()->dimension(0)); // Set build options CLBuildOptions build_opts; @@ -107,9 +96,8 @@ void CLMeanStdDevNormalizationKernel::configure(const CLCompileContext &compile_ _kernel = create_kernel(compile_context, "mean_stddev_normalization", build_opts.options()); // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), (_run_in_place) ? nullptr : output->info()); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure_internal(win_config.second); + Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); + ICLKernel::configure_internal(win); // Set config_id for enabling LWS tuning _config_id = "mean_stddev_normalization_layer_"; @@ -123,7 +111,6 @@ void CLMeanStdDevNormalizationKernel::configure(const CLCompileContext &compile_ Status CLMeanStdDevNormalizationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, float epsilon) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, epsilon)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (output != nullptr) ? output->clone().get() : nullptr).first); return Status{}; } diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h index c2b4cd7da4..37c5f1626d 100644 --- a/tests/datasets/ShapeDatasets.h +++ b/tests/datasets/ShapeDatasets.h @@ -88,9 +88,9 @@ public: Small2DShapes() : ShapeDataset("Shape", { - TensorShape{ 7U, 7U }, - TensorShape{ 27U, 13U }, - TensorShape{ 128U, 64U } + TensorShape{ 1U, 7U }, + TensorShape{ 5U, 13U }, + TensorShape{ 32U, 64U } }) { } diff --git a/tests/validation/CL/MeanStdDevNormalizationLayer.cpp b/tests/validation/CL/MeanStdDevNormalizationLayer.cpp index e77a21ed7f..cdeb622130 100644 --- a/tests/validation/CL/MeanStdDevNormalizationLayer.cpp +++ b/tests/validation/CL/MeanStdDevNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -78,7 +78,7 @@ TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("InPlace", { false, true })), - framework::dataset::make("Epsilon", { 1e-8 }))) + framework::dataset::make("Epsilon", { 1e-3 }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16); diff --git a/tests/validation/NEON/MeanStdDevNormalizationLayer.cpp b/tests/validation/NEON/MeanStdDevNormalizationLayer.cpp index 20e3bd5325..90d3d05a0d 100644 --- a/tests/validation/NEON/MeanStdDevNormalizationLayer.cpp +++ b/tests/validation/NEON/MeanStdDevNormalizationLayer.cpp @@ -81,7 +81,7 @@ TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEMeanStdDevNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("InPlace", { false, true })), - framework::dataset::make("Epsilon", { 1e-8 }))) + framework::dataset::make("Epsilon", { 1e-3 }))) { // Validate output validate(Accessor(_target), _reference, tolerance_f16); @@ -101,7 +101,7 @@ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEMeanStdDevNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("InPlace", { false, true })), - framework::dataset::make("Epsilon", { 1e-8 }))) + framework::dataset::make("Epsilon", { 1e-7 }))) { // Validate output validate(Accessor(_target), _reference, tolerance_f32); -- cgit v1.2.1