aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp')
-rw-r--r--src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp18
1 files changed, 9 insertions, 9 deletions
diff --git a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp
index d12255ff24..8f669a9298 100644
--- a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp
+++ b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp
@@ -69,16 +69,16 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
AccessWindowRectangle input_access(input, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
window_changed = window_changed || update_window_and_padding(win, input_access);
- // Configure window in case of configured output
- if(output->total_size() != 0)
- {
- const float scale_x = 4.0f * static_cast<float>(mult_interleave4x4_height);
- const float scale_y = 1.0f / (scale_x);
+ // Output auto inizialitation if not yet initialized
+ auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_interleaved_shape(*input, mult_interleave4x4_height)));
- AccessWindowRectangle output_access(output, 0, 0, num_elems_written_per_iteration, 1, scale_x, scale_y);
- window_changed = window_changed || update_window_and_padding(win, output_access);
- output_access.set_valid_region(win, input->valid_region());
- }
+ // Configure window
+ const float scale_x = 4.0f * static_cast<float>(mult_interleave4x4_height);
+ const float scale_y = 1.0f / (scale_x);
+
+ AccessWindowRectangle output_access(output, 0, 0, num_elems_written_per_iteration, 1, scale_x, scale_y);
+ window_changed = window_changed || update_window_and_padding(win, output_access);
+ output_access.set_valid_region(win, input->valid_region());
// Collapse along the Z direction
// This collapse needs to be here in order to tune the Z dimension of LWS