aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-07-12 12:42:35 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commit5b52fe3a4481769adcf42218a3747486cb4e9c14 (patch)
tree947c84d7d257f2c47045245f9aea30b772cdfb0a /src/core/CL/kernels
parent71d9b57aac146ae3ad5648c1308a872cea90070d (diff)
downloadComputeLibrary-5b52fe3a4481769adcf42218a3747486cb4e9c14.tar.gz
COMPMID-1390: OCLGrind and benchmark tests fail for QASYMM8
COMPMID-1392: OCLGrind failures in im2col1x1_stridex1_dchw COMPMID-1395: OCLGrind failures in output_stage_quantized Change-Id: I35504bd1f701316df122be52d458c71bbd7e7909 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/139722 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/CL/kernels')
-rw-r--r--src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp44
-rw-r--r--src/core/CL/kernels/CLIm2ColKernel.cpp2
2 files changed, 17 insertions, 29 deletions
diff --git a/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp
index 3d9d520841..4e2352cf6e 100644
--- a/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp
+++ b/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp
@@ -90,44 +90,29 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
bool window_changed = false;
unsigned int num_elems_processed_per_iteration = 16 / element_size_from_data_type(input->data_type());
- // Update processed elements when input is S32 (comes from quantization input)
- if(input->data_type() == DataType::S32)
- {
- num_elems_processed_per_iteration = 16;
- }
-
// Configure kernel window
- Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
+ Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
+
+ // Input window
AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
+ window_changed = window_changed || update_window_and_padding(win, input_access);
+
+ // Bias window
+ if(bias != nullptr)
+ {
+ AccessWindowStatic bias_access(bias, 0, 0, ceil_to_multiple(bias->dimension(0), num_elems_processed_per_iteration), bias->dimension(1));
+ window_changed = window_changed || update_window_and_padding(win, bias_access);
+ }
+ // Output window
if(output != nullptr && (output->total_size() != 0))
{
AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
-
- if(bias == nullptr)
- {
- window_changed = update_window_and_padding(win, input_access, output_access);
- }
- else
- {
- AccessWindowStatic bias_access(bias, 0, 0, bias->dimension(0), bias->dimension(1));
- window_changed = update_window_and_padding(win, input_access, output_access, bias_access);
- }
-
+ window_changed = window_changed || update_window_and_padding(win, output_access);
output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
}
else
{
- if(bias == nullptr)
- {
- window_changed = update_window_and_padding(win, input_access);
- }
- else
- {
- AccessWindowStatic bias_access(bias, 0, 0, bias->dimension(0), bias->dimension(1));
- window_changed = update_window_and_padding(win, input_access, bias_access);
- }
-
input_access.set_valid_region(win, ValidRegion(Coordinates(), input->tensor_shape()));
}
@@ -165,10 +150,13 @@ void CLDirectConvolutionLayerOutputStageKernel::configure(ICLTensor *input, cons
_result_shift = result_shift;
_result_offset_after_shift = result_offset_after_shift;
+ const unsigned int num_elems_accessed_per_iteration = 16 / element_size_from_data_type(input->info()->data_type());
+
// Create kernel
CLBuildOptions build_opts;
build_opts.add_option_if(bias != nullptr, "-DHAS_BIAS");
build_opts.add_option("-D" + string_from_data_layout(input->info()->data_layout()));
+ build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_accessed_per_iteration));
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("output_stage_quantized", build_opts.options()));
// Set static kernel arguments
diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp
index 06ca005dd5..b1290b8edd 100644
--- a/src/core/CL/kernels/CLIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLIm2ColKernel.cpp
@@ -262,7 +262,7 @@ CLIm2ColKernel::configure_window(const ICLTensor *input, ICLTensor *output, cons
AccessWindowStatic input_access(input->info(),
-border.left,
-border.top,
- ceil_to_multiple(input_width + border.right, kernel_dims.width),
+ ceil_to_multiple(input_width + border.right, kernel_dims.width * _num_elems_processed_per_iteration),
input_height + border.bottom);
update_window_and_padding(win, input_access);
}