From 552fe4c67d3cd2994cdbd5662cde79da5caf0c4d Mon Sep 17 00:00:00 2001 From: Murray Kornelsen Date: Fri, 22 Jul 2022 18:04:59 -0400 Subject: F16 Specialization for MeanStdDevNorm Ran into issues with f16 meanstddevnorm. Essentially, with large enough tensors and/or large values in tensors, output becomes all 0. This is due to the variance computation. In f16, it reaches infinity quite easily, then the division results in 0. This change modifies the OpenCL and NEON implementations to compute the sum of squares and the variance using f32, while other operations remain f16. Update: Found that the square operation also benefits from f32, rather than squaring in f16 and accumulating f32. Signed-off-by: Murray Kornelsen Change-Id: Ide00afd84ec6d26fec4d53b073e295814f08ba46 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7959 Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice Comments-Addressed: Pablo Marquez Tello Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/core/CL/kernels') diff --git a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp index da9e367590..b94593943c 100644 --- a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -91,6 +91,7 @@ void CLMeanStdDevNormalizationKernel::configure(const CLCompileContext &compile_ build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); build_opts.add_option("-DEPSILON=" + float_to_string_with_full_precision(epsilon)); build_opts.add_option("-DWIDTH=" + support::cpp11::to_string(input->info()->dimension(0))); + build_opts.add_option_if(input->info()->data_type() == DataType::F16, "-DMEANSTDNORM_HALF"); build_opts.add_option_if(_run_in_place, "-DIN_PLACE"); // Create kernel -- cgit v1.2.1