COMPMID-3724: Remove OpenCL padding: CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel

COMPMID-3725: Remove OpenCL padding: CLGEMMLowpQuantizeDownInt32ScaleKernel Change-Id: Idea5974a56861efae3bc255f1224c7f1e88f3650 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4182 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Michele Di Giorgio <michele.digiorgio@arm.com> 2020-10-14 12:26:51 +0100
committer: Michele Di Giorgio <michele.digiorgio@arm.com> 2020-10-16 17:19:40 +0000
commit: 671d4f01d96b62a24cf0688059118a1e7908650e (patch)
tree: 33ee626be7de34f0c7fb91da9cb136004c361cb7 /src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp
parent: 3b9a564fd4573d7cf09e3203eb8a9a30fd5969c9 (diff)
download: ComputeLibrary-671d4f01d96b62a24cf0688059118a1e7908650e.tar.gz
1 files changed, 16 insertions, 39 deletions
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp
index 55e4ed2bd9..79888cdba2 100644
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp
@@ -23,7 +23,6 @@
  */
 #include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h"
 
-#include "arm_compute/core/AccessWindowStatic.h"
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/ICLTensor.h"
 #include "arm_compute/core/Error.h"
@@ -62,41 +61,13 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con
 
     return Status{};
 }
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *bias, ITensorInfo *output, DataType output_data_type)
-{
-    // Output auto inizialitation if not yet initialized
-    auto_init_if_empty(*output, input->clone()->set_data_type(output_data_type));
-
-    constexpr unsigned int num_elems_processed_per_iteration = 4;
-
-    // Configure kernel window
-    Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
-
-    AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
-
-    bool window_changed = update_window_and_padding(win,
-                                                    input_access);
-
-    AccessWindowHorizontal output_result_access(output, 0, num_elems_processed_per_iteration);
-    window_changed = window_changed || update_window_and_padding(win, output_result_access);
-    output_result_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
-
-    if(bias != nullptr)
-    {
-        AccessWindowStatic bias_access(bias, 0, 0, ceil_to_multiple(bias->dimension(0), num_elems_processed_per_iteration), bias->tensor_shape()[1]);
-        window_changed = window_changed || update_window_and_padding(win, bias_access);
-    }
-
-    Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
-    return std::make_pair(err, win);
-}
 } //namespace
 
 CLGEMMLowpQuantizeDownInt32ScaleKernel::CLGEMMLowpQuantizeDownInt32ScaleKernel()
-    : _input(nullptr), _bias(nullptr), _output(nullptr), _output_stage(nullptr)
+    : _input(nullptr), _bias(nullptr), _output(nullptr)
 {
 }
+
 Status CLGEMMLowpQuantizeDownInt32ScaleKernel::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
@@ -110,7 +81,8 @@ void CLGEMMLowpQuantizeDownInt32ScaleKernel::configure(const ICLTensor *input, c
     configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, output_stage);
 }
 
-void CLGEMMLowpQuantizeDownInt32ScaleKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage)
+void CLGEMMLowpQuantizeDownInt32ScaleKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output,
+                                                       const GEMMLowpOutputStageInfo *output_stage)
 {
     // Perform validate step
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
@@ -120,15 +92,21 @@ void CLGEMMLowpQuantizeDownInt32ScaleKernel::configure(const CLCompileContext &c
                                                   output->info(),
                                                   output_stage));
 
-    _input        = input;
-    _bias         = bias;
-    _output       = output;
-    _output_stage = output_stage;
+    // Output auto inizialitation if not yet initialized
+    auto_init_if_empty(*output->info(), input->info()->clone()->set_data_type(output_stage->output_data_type));
+
+    _input  = input;
+    _bias   = bias;
+    _output = output;
+
+    const unsigned int num_elems_processed_per_iteration = adjust_vec_size(4, input->info()->dimension(0));
 
     // Set the arguments to pass at compile time
     auto           min = output_stage->gemmlowp_min_bound;
     auto           max = output_stage->gemmlowp_max_bound;
     CLBuildOptions build_opts;
+    build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
+    build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->info()->dimension(0) % num_elems_processed_per_iteration));
     build_opts.add_option("-DRESULT_OFFSET=" + support::cpp11::to_string(output_stage->gemmlowp_offset));
     build_opts.add_option("-DRESULT_MULT_INT=" + support::cpp11::to_string(output_stage->gemmlowp_multiplier));
     build_opts.add_option("-DRESULT_SHIFT=" + support::cpp11::to_string(output_stage->gemmlowp_shift));
@@ -143,9 +121,8 @@ void CLGEMMLowpQuantizeDownInt32ScaleKernel::configure(const CLCompileContext &c
     _kernel = create_kernel(compile_context, "gemmlowp_output_stage_quantize_down", build_opts.options());
 
     // Configure kernel window
-    auto win_config = validate_and_configure_window(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info(), output_stage->output_data_type);
-    ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    ICLKernel::configure_internal(win_config.second);
+    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
+    ICLKernel::configure_internal(win);
 }
 
 void CLGEMMLowpQuantizeDownInt32ScaleKernel::run(const Window &window, cl::CommandQueue &queue)
author	Michele Di Giorgio <michele.digiorgio@arm.com>	2020-10-14 12:26:51 +0100
committer	Michele Di Giorgio <michele.digiorgio@arm.com>	2020-10-16 17:19:40 +0000
commit	671d4f01d96b62a24cf0688059118a1e7908650e (patch)
tree	33ee626be7de34f0c7fb91da9cb136004c361cb7 /src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp
parent	3b9a564fd4573d7cf09e3203eb8a9a30fd5969c9 (diff)
download	ComputeLibrary-671d4f01d96b62a24cf0688059118a1e7908650e.tar.gz