From 5b52fe3a4481769adcf42218a3747486cb4e9c14 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 12 Jul 2018 12:42:35 +0100 Subject: COMPMID-1390: OCLGrind and benchmark tests fail for QASYMM8 COMPMID-1392: OCLGrind failures in im2col1x1_stridex1_dchw COMPMID-1395: OCLGrind failures in output_stage_quantized Change-Id: I35504bd1f701316df122be52d458c71bbd7e7909 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/139722 Tested-by: Jenkins Reviewed-by: Giorgio Arena Reviewed-by: Anthony Barbier --- .../CLDirectConvolutionOutputStageKernel.cpp | 44 ++++++++-------------- src/core/CL/kernels/CLIm2ColKernel.cpp | 2 +- 2 files changed, 17 insertions(+), 29 deletions(-) (limited to 'src/core/CL/kernels') diff --git a/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp index 3d9d520841..4e2352cf6e 100644 --- a/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp @@ -90,44 +90,29 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen bool window_changed = false; unsigned int num_elems_processed_per_iteration = 16 / element_size_from_data_type(input->data_type()); - // Update processed elements when input is S32 (comes from quantization input) - if(input->data_type() == DataType::S32) - { - num_elems_processed_per_iteration = 16; - } - // Configure kernel window - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); + Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); + + // Input window AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); + window_changed = window_changed || update_window_and_padding(win, input_access); + + // Bias window + if(bias != nullptr) + { + AccessWindowStatic bias_access(bias, 0, 0, ceil_to_multiple(bias->dimension(0), num_elems_processed_per_iteration), bias->dimension(1)); + window_changed = window_changed || update_window_and_padding(win, bias_access); + } + // Output window if(output != nullptr && (output->total_size() != 0)) { AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - - if(bias == nullptr) - { - window_changed = update_window_and_padding(win, input_access, output_access); - } - else - { - AccessWindowStatic bias_access(bias, 0, 0, bias->dimension(0), bias->dimension(1)); - window_changed = update_window_and_padding(win, input_access, output_access, bias_access); - } - + window_changed = window_changed || update_window_and_padding(win, output_access); output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); } else { - if(bias == nullptr) - { - window_changed = update_window_and_padding(win, input_access); - } - else - { - AccessWindowStatic bias_access(bias, 0, 0, bias->dimension(0), bias->dimension(1)); - window_changed = update_window_and_padding(win, input_access, bias_access); - } - input_access.set_valid_region(win, ValidRegion(Coordinates(), input->tensor_shape())); } @@ -165,10 +150,13 @@ void CLDirectConvolutionLayerOutputStageKernel::configure(ICLTensor *input, cons _result_shift = result_shift; _result_offset_after_shift = result_offset_after_shift; + const unsigned int num_elems_accessed_per_iteration = 16 / element_size_from_data_type(input->info()->data_type()); + // Create kernel CLBuildOptions build_opts; build_opts.add_option_if(bias != nullptr, "-DHAS_BIAS"); build_opts.add_option("-D" + string_from_data_layout(input->info()->data_layout())); + build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_accessed_per_iteration)); _kernel = static_cast(CLKernelLibrary::get().create_kernel("output_stage_quantized", build_opts.options())); // Set static kernel arguments diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 06ca005dd5..b1290b8edd 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -262,7 +262,7 @@ CLIm2ColKernel::configure_window(const ICLTensor *input, ICLTensor *output, cons AccessWindowStatic input_access(input->info(), -border.left, -border.top, - ceil_to_multiple(input_width + border.right, kernel_dims.width), + ceil_to_multiple(input_width + border.right, kernel_dims.width * _num_elems_processed_per_iteration), input_height + border.bottom); update_window_and_padding(win, input_access); } -- cgit v1.2.1