diff options
Diffstat (limited to 'src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp')
-rw-r--r-- | src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp | 55 |
1 files changed, 23 insertions, 32 deletions
diff --git a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp index a889df7930..8632bdf623 100644 --- a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,9 @@ #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/utils/helpers/AdjustVecSize.h" +#include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -47,39 +50,19 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, f ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); // Checks performed when output is configured - if((output != nullptr) && (output->total_size() != 0)) + if ((output != nullptr) && (output->total_size() != 0)) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); } return Status{}; } - -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) -{ - if(output != nullptr) - { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output, *input); - } - - const unsigned int num_elems_processed_per_iteration = 16 / input->element_size(); - - // This kernel doesn't need padding - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); - if(output != nullptr) - { - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); - } - - return std::make_pair(Status{}, win); -} } // namespace CLMeanStdDevNormalizationKernel::CLMeanStdDevNormalizationKernel() : _input(nullptr), _output(nullptr), _run_in_place(false) { + _type = CLKernelType::ELEMENTWISE; } void CLMeanStdDevNormalizationKernel::configure(ICLTensor *input, ICLTensor *output, float epsilon) @@ -87,18 +70,28 @@ void CLMeanStdDevNormalizationKernel::configure(ICLTensor *input, ICLTensor *out configure(CLKernelLibrary::get().get_compile_context(), input, output, epsilon); } -void CLMeanStdDevNormalizationKernel::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, float epsilon) +void CLMeanStdDevNormalizationKernel::configure(const CLCompileContext &compile_context, + ICLTensor *input, + ICLTensor *output, + float epsilon) { ARM_COMPUTE_ERROR_ON_NULLPTR(input); _run_in_place = (output == nullptr) || (output == input); - ARM_COMPUTE_ERROR_THROW_ON(CLMeanStdDevNormalizationKernel::validate(input->info(), (output != nullptr) ? output->info() : nullptr, epsilon)); + ARM_COMPUTE_ERROR_THROW_ON(CLMeanStdDevNormalizationKernel::validate( + input->info(), (output != nullptr) ? output->info() : nullptr, epsilon)); + + if (output != nullptr) + { + auto_init_if_empty(*output->info(), *input->info()); + } _input = input; _output = output; - const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size(); + const unsigned int num_elems_processed_per_iteration = + adjust_vec_size(16 / input->info()->element_size(), input->info()->dimension(0)); // Set build options CLBuildOptions build_opts; @@ -106,15 +99,15 @@ void CLMeanStdDevNormalizationKernel::configure(const CLCompileContext &compile_ build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); build_opts.add_option("-DEPSILON=" + float_to_string_with_full_precision(epsilon)); build_opts.add_option("-DWIDTH=" + support::cpp11::to_string(input->info()->dimension(0))); + build_opts.add_option_if(input->info()->data_type() == DataType::F16, "-DMEANSTDNORM_HALF"); build_opts.add_option_if(_run_in_place, "-DIN_PLACE"); // Create kernel _kernel = create_kernel(compile_context, "mean_stddev_normalization", build_opts.options()); // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), (_run_in_place) ? nullptr : output->info()); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure_internal(win_config.second); + Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); + ICLKernel::configure_internal(win); // Set config_id for enabling LWS tuning _config_id = "mean_stddev_normalization_layer_"; @@ -128,7 +121,6 @@ void CLMeanStdDevNormalizationKernel::configure(const CLCompileContext &compile_ Status CLMeanStdDevNormalizationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, float epsilon) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, epsilon)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (output != nullptr) ? output->clone().get() : nullptr).first); return Status{}; } @@ -148,7 +140,6 @@ void CLMeanStdDevNormalizationKernel::run(const Window &window, cl::CommandQueue add_2D_tensor_argument_if((!_run_in_place), idx, _output, slice); enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); + } while (window.slide_window_slice_2D(slice)); } } // namespace arm_compute |