aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2018-05-08 12:01:57 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:51:50 +0000
commit750641dd6aab1e5e62d1875b97b230312bb87959 (patch)
treeb3b180c07d7769cb32a6f35b6d0df2384a4638b0 /src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp
parentaa3240d3e2a575c436ec60ea0a31e8375d997425 (diff)
downloadComputeLibrary-750641dd6aab1e5e62d1875b97b230312bb87959.tar.gz
COMPMID-1052 - Rework validate method in CLGEMM
Change-Id: Iece5bd6478b5fac5164abff30c1e63e8a77291a9 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130374 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp')
-rw-r--r--src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp18
1 files changed, 9 insertions, 9 deletions
diff --git a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp
index d12255ff24..8f669a9298 100644
--- a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp
+++ b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp
@@ -69,16 +69,16 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
AccessWindowRectangle input_access(input, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
window_changed = window_changed || update_window_and_padding(win, input_access);
- // Configure window in case of configured output
- if(output->total_size() != 0)
- {
- const float scale_x = 4.0f * static_cast<float>(mult_interleave4x4_height);
- const float scale_y = 1.0f / (scale_x);
+ // Output auto inizialitation if not yet initialized
+ auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_interleaved_shape(*input, mult_interleave4x4_height)));
- AccessWindowRectangle output_access(output, 0, 0, num_elems_written_per_iteration, 1, scale_x, scale_y);
- window_changed = window_changed || update_window_and_padding(win, output_access);
- output_access.set_valid_region(win, input->valid_region());
- }
+ // Configure window
+ const float scale_x = 4.0f * static_cast<float>(mult_interleave4x4_height);
+ const float scale_y = 1.0f / (scale_x);
+
+ AccessWindowRectangle output_access(output, 0, 0, num_elems_written_per_iteration, 1, scale_x, scale_y);
+ window_changed = window_changed || update_window_and_padding(win, output_access);
+ output_access.set_valid_region(win, input->valid_region());
// Collapse along the Z direction
// This collapse needs to be here in order to tune the Z dimension of LWS