From 8468371b3e2ec42ee0b9b670d45d99eb1015574b Mon Sep 17 00:00:00 2001 From: Ramy Elgammal Date: Fri, 16 Dec 2022 13:39:33 +0000 Subject: Make CLReshape kernel window based on dst instead of src Signed-off-by: Ramy Elgammal Partially-Resolves: COMPMID-5522 Change-Id: I1d90003079c3f24d081cc49f7b110eda753f6995 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8838 Reviewed-by: Gian Marco Iodice Benchmark: Arm Jenkins Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- src/core/CL/cl_kernels/common/reshape_layer.cl | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'src/core/CL/cl_kernels/common/reshape_layer.cl') diff --git a/src/core/CL/cl_kernels/common/reshape_layer.cl b/src/core/CL/cl_kernels/common/reshape_layer.cl index bfdefc863e..c47664bf85 100644 --- a/src/core/CL/cl_kernels/common/reshape_layer.cl +++ b/src/core/CL/cl_kernels/common/reshape_layer.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -51,20 +51,20 @@ __kernel void reshape_layer(TENSOR3D_DECLARATION(input), int2 input_shape, int2 output_shape) { - Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(input); - Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(output); + int out_x = get_global_id(0); + int out_y = get_global_id(1); + int out_z = get_global_id(2); - int3 id = (int3)(get_global_id(0), get_global_id(1), get_global_id(2)); + // Compute the output linearized index + int out_linear_idx = out_x + out_y * output_shape.x + out_z * output_shape.x * output_shape.y; - // Linearize index - int linear_idx = id.x + id.y * input_shape.x + id.z * input_shape.x * input_shape.y; - - // Translate to output - int3 out_id; - out_id.x = linear_idx % output_shape.x; - out_id.y = (linear_idx / output_shape.x) % output_shape.y; - out_id.z = linear_idx / (output_shape.x * output_shape.y); + // Translate to intput + int in_x = out_linear_idx % input_shape.x; + int in_y = (out_linear_idx / input_shape.x) % input_shape.y; + int in_z = out_linear_idx / (input_shape.x * input_shape.y); // Store result - *((__global DATA_TYPE *)tensor3D_offset(&out, out_id.x, out_id.y, out_id.z)) = *((__global DATA_TYPE *)in.ptr); + input_ptr += input_offset_first_element_in_bytes + in_x * input_stride_x + in_y * input_stride_y + in_z * input_stride_z; + output_ptr += output_offset_first_element_in_bytes + out_x * output_stride_x + out_y * output_stride_y + out_z * output_stride_z; + *((__global DATA_TYPE *)output_ptr) = *((__global DATA_TYPE *)input_ptr); } -- cgit v1.2.1