From 750641dd6aab1e5e62d1875b97b230312bb87959 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Tue, 8 May 2018 12:01:57 +0100 Subject: COMPMID-1052 - Rework validate method in CLGEMM Change-Id: Iece5bd6478b5fac5164abff30c1e63e8a77291a9 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130374 Reviewed-by: Anthony Barbier Tested-by: Jenkins --- src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp') diff --git a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp index d12255ff24..8f669a9298 100644 --- a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp +++ b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp @@ -69,16 +69,16 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen AccessWindowRectangle input_access(input, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); window_changed = window_changed || update_window_and_padding(win, input_access); - // Configure window in case of configured output - if(output->total_size() != 0) - { - const float scale_x = 4.0f * static_cast(mult_interleave4x4_height); - const float scale_y = 1.0f / (scale_x); + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_interleaved_shape(*input, mult_interleave4x4_height))); - AccessWindowRectangle output_access(output, 0, 0, num_elems_written_per_iteration, 1, scale_x, scale_y); - window_changed = window_changed || update_window_and_padding(win, output_access); - output_access.set_valid_region(win, input->valid_region()); - } + // Configure window + const float scale_x = 4.0f * static_cast(mult_interleave4x4_height); + const float scale_y = 1.0f / (scale_x); + + AccessWindowRectangle output_access(output, 0, 0, num_elems_written_per_iteration, 1, scale_x, scale_y); + window_changed = window_changed || update_window_and_padding(win, output_access); + output_access.set_valid_region(win, input->valid_region()); // Collapse along the Z direction // This collapse needs to be here in order to tune the Z dimension of LWS -- cgit v1.2.1