diff options
author | Giorgio Arena <giorgio.arena@arm.com> | 2021-05-12 12:28:58 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-05-17 09:02:15 +0000 |
commit | ed4b8a07e67c7802207c8954a88ad7a91aec79e0 (patch) | |
tree | 771cb0867fa675cf02286006f7fafa2f66a814e2 /src/core/CL/kernels | |
parent | 186fe683da63dea2dac06e46a412e354d33cd9c2 (diff) | |
download | ComputeLibrary-ed4b8a07e67c7802207c8954a88ad7a91aec79e0.tar.gz |
Fix MeanStdDevNormalizationLayer reference outputting nan for FP16
- Bring the epsilon up to 1e-3 for FP16 (both backends) since it was causing the reference's variance being negative and its square root being NaN
- Bring the epsilon up to 1e-7 for FP16 NEON test for the same problem on the NEON kernel
- Adjust the CL kernel's vec_size when input tensor's width < 16 and use macros agnostic of vector size for sum reduction
- Add previously mismatching tensor shapes
Resolve COMPMID-4354
Change-Id: I823c871aacb72326f90c86b24cb16c3e2d4bd15e
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5630
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/CL/kernels')
-rw-r--r-- | src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp | 29 |
1 files changed, 8 insertions, 21 deletions
diff --git a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp index 7dc34f17b5..9f98b67582 100644 --- a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp @@ -54,22 +54,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, f } return Status{}; } - -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) -{ - if(output != nullptr) - { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output, *input); - } - - const unsigned int num_elems_processed_per_iteration = 16 / input->element_size(); - - // This kernel doesn't need padding - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); - return std::make_pair(Status{}, win); -} } // namespace CLMeanStdDevNormalizationKernel::CLMeanStdDevNormalizationKernel() @@ -90,10 +74,15 @@ void CLMeanStdDevNormalizationKernel::configure(const CLCompileContext &compile_ ARM_COMPUTE_ERROR_THROW_ON(CLMeanStdDevNormalizationKernel::validate(input->info(), (output != nullptr) ? output->info() : nullptr, epsilon)); + if(output != nullptr) + { + auto_init_if_empty(*output->info(), *input->info()); + } + _input = input; _output = output; - const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size(); + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / input->info()->element_size(), input->info()->dimension(0)); // Set build options CLBuildOptions build_opts; @@ -107,9 +96,8 @@ void CLMeanStdDevNormalizationKernel::configure(const CLCompileContext &compile_ _kernel = create_kernel(compile_context, "mean_stddev_normalization", build_opts.options()); // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), (_run_in_place) ? nullptr : output->info()); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure_internal(win_config.second); + Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); + ICLKernel::configure_internal(win); // Set config_id for enabling LWS tuning _config_id = "mean_stddev_normalization_layer_"; @@ -123,7 +111,6 @@ void CLMeanStdDevNormalizationKernel::configure(const CLCompileContext &compile_ Status CLMeanStdDevNormalizationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, float epsilon) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, epsilon)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (output != nullptr) ? output->clone().get() : nullptr).first); return Status{}; } |