diff options
author | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2018-05-08 12:01:57 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:51:50 +0000 |
commit | 750641dd6aab1e5e62d1875b97b230312bb87959 (patch) | |
tree | b3b180c07d7769cb32a6f35b6d0df2384a4638b0 /src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp | |
parent | aa3240d3e2a575c436ec60ea0a31e8375d997425 (diff) | |
download | ComputeLibrary-750641dd6aab1e5e62d1875b97b230312bb87959.tar.gz |
COMPMID-1052 - Rework validate method in CLGEMM
Change-Id: Iece5bd6478b5fac5164abff30c1e63e8a77291a9
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130374
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp')
-rw-r--r-- | src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp | 18 |
1 files changed, 9 insertions, 9 deletions
diff --git a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp index d12255ff24..8f669a9298 100644 --- a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp +++ b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp @@ -69,16 +69,16 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen AccessWindowRectangle input_access(input, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); window_changed = window_changed || update_window_and_padding(win, input_access); - // Configure window in case of configured output - if(output->total_size() != 0) - { - const float scale_x = 4.0f * static_cast<float>(mult_interleave4x4_height); - const float scale_y = 1.0f / (scale_x); + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_interleaved_shape(*input, mult_interleave4x4_height))); - AccessWindowRectangle output_access(output, 0, 0, num_elems_written_per_iteration, 1, scale_x, scale_y); - window_changed = window_changed || update_window_and_padding(win, output_access); - output_access.set_valid_region(win, input->valid_region()); - } + // Configure window + const float scale_x = 4.0f * static_cast<float>(mult_interleave4x4_height); + const float scale_y = 1.0f / (scale_x); + + AccessWindowRectangle output_access(output, 0, 0, num_elems_written_per_iteration, 1, scale_x, scale_y); + window_changed = window_changed || update_window_and_padding(win, output_access); + output_access.set_valid_region(win, input->valid_region()); // Collapse along the Z direction // This collapse needs to be here in order to tune the Z dimension of LWS |