From 671d4f01d96b62a24cf0688059118a1e7908650e Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Wed, 14 Oct 2020 12:26:51 +0100 Subject: COMPMID-3724: Remove OpenCL padding: CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel COMPMID-3725: Remove OpenCL padding: CLGEMMLowpQuantizeDownInt32ScaleKernel Change-Id: Idea5974a56861efae3bc255f1224c7f1e88f3650 Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4182 Tested-by: Arm Jenkins Reviewed-by: Giorgio Arena Comments-Addressed: Arm Jenkins --- .../CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp | 55 +++++++--------------- 1 file changed, 16 insertions(+), 39 deletions(-) (limited to 'src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp') diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp index 55e4ed2bd9..79888cdba2 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h" -#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Error.h" @@ -62,41 +61,13 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con return Status{}; } - -std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *bias, ITensorInfo *output, DataType output_data_type) -{ - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output, input->clone()->set_data_type(output_data_type)); - - constexpr unsigned int num_elems_processed_per_iteration = 4; - - // Configure kernel window - Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); - - bool window_changed = update_window_and_padding(win, - input_access); - - AccessWindowHorizontal output_result_access(output, 0, num_elems_processed_per_iteration); - window_changed = window_changed || update_window_and_padding(win, output_result_access); - output_result_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); - - if(bias != nullptr) - { - AccessWindowStatic bias_access(bias, 0, 0, ceil_to_multiple(bias->dimension(0), num_elems_processed_per_iteration), bias->tensor_shape()[1]); - window_changed = window_changed || update_window_and_padding(win, bias_access); - } - - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; - return std::make_pair(err, win); -} } //namespace CLGEMMLowpQuantizeDownInt32ScaleKernel::CLGEMMLowpQuantizeDownInt32ScaleKernel() - : _input(nullptr), _bias(nullptr), _output(nullptr), _output_stage(nullptr) + : _input(nullptr), _bias(nullptr), _output(nullptr) { } + Status CLGEMMLowpQuantizeDownInt32ScaleKernel::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -110,7 +81,8 @@ void CLGEMMLowpQuantizeDownInt32ScaleKernel::configure(const ICLTensor *input, c configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, output_stage); } -void CLGEMMLowpQuantizeDownInt32ScaleKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage) +void CLGEMMLowpQuantizeDownInt32ScaleKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, + const GEMMLowpOutputStageInfo *output_stage) { // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -120,15 +92,21 @@ void CLGEMMLowpQuantizeDownInt32ScaleKernel::configure(const CLCompileContext &c output->info(), output_stage)); - _input = input; - _bias = bias; - _output = output; - _output_stage = output_stage; + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output->info(), input->info()->clone()->set_data_type(output_stage->output_data_type)); + + _input = input; + _bias = bias; + _output = output; + + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(4, input->info()->dimension(0)); // Set the arguments to pass at compile time auto min = output_stage->gemmlowp_min_bound; auto max = output_stage->gemmlowp_max_bound; CLBuildOptions build_opts; + build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); + build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->info()->dimension(0) % num_elems_processed_per_iteration)); build_opts.add_option("-DRESULT_OFFSET=" + support::cpp11::to_string(output_stage->gemmlowp_offset)); build_opts.add_option("-DRESULT_MULT_INT=" + support::cpp11::to_string(output_stage->gemmlowp_multiplier)); build_opts.add_option("-DRESULT_SHIFT=" + support::cpp11::to_string(output_stage->gemmlowp_shift)); @@ -143,9 +121,8 @@ void CLGEMMLowpQuantizeDownInt32ScaleKernel::configure(const CLCompileContext &c _kernel = create_kernel(compile_context, "gemmlowp_output_stage_quantize_down", build_opts.options()); // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info(), output_stage->output_data_type); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure_internal(win_config.second); + Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); + ICLKernel::configure_internal(win); } void CLGEMMLowpQuantizeDownInt32ScaleKernel::run(const Window &window, cl::CommandQueue &queue) -- cgit v1.2.1