diff options
author | Ramy Elgammal <ramy.elgammal@arm.com> | 2022-12-16 13:39:33 +0000 |
---|---|---|
committer | Ramy Elgammal <ramy.elgammal@arm.com> | 2022-12-23 17:17:18 +0000 |
commit | 8468371b3e2ec42ee0b9b670d45d99eb1015574b (patch) | |
tree | a5fc48d97c6ff93757cd11897b24dbbeb0366cdf /src/core/CL | |
parent | 04f4620cf999846a44089c81720aa920edec6993 (diff) | |
download | ComputeLibrary-8468371b3e2ec42ee0b9b670d45d99eb1015574b.tar.gz |
Make CLReshape kernel window based on dst instead of src
Signed-off-by: Ramy Elgammal <ramy.elgammal@arm.com>
Partially-Resolves: COMPMID-5522
Change-Id: I1d90003079c3f24d081cc49f7b110eda753f6995
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8838
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL')
-rw-r--r-- | src/core/CL/cl_kernels/common/reshape_layer.cl | 26 |
1 files changed, 13 insertions, 13 deletions
diff --git a/src/core/CL/cl_kernels/common/reshape_layer.cl b/src/core/CL/cl_kernels/common/reshape_layer.cl index bfdefc863e..c47664bf85 100644 --- a/src/core/CL/cl_kernels/common/reshape_layer.cl +++ b/src/core/CL/cl_kernels/common/reshape_layer.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -51,20 +51,20 @@ __kernel void reshape_layer(TENSOR3D_DECLARATION(input), int2 input_shape, int2 output_shape) { - Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(input); - Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(output); + int out_x = get_global_id(0); + int out_y = get_global_id(1); + int out_z = get_global_id(2); - int3 id = (int3)(get_global_id(0), get_global_id(1), get_global_id(2)); + // Compute the output linearized index + int out_linear_idx = out_x + out_y * output_shape.x + out_z * output_shape.x * output_shape.y; - // Linearize index - int linear_idx = id.x + id.y * input_shape.x + id.z * input_shape.x * input_shape.y; - - // Translate to output - int3 out_id; - out_id.x = linear_idx % output_shape.x; - out_id.y = (linear_idx / output_shape.x) % output_shape.y; - out_id.z = linear_idx / (output_shape.x * output_shape.y); + // Translate to intput + int in_x = out_linear_idx % input_shape.x; + int in_y = (out_linear_idx / input_shape.x) % input_shape.y; + int in_z = out_linear_idx / (input_shape.x * input_shape.y); // Store result - *((__global DATA_TYPE *)tensor3D_offset(&out, out_id.x, out_id.y, out_id.z)) = *((__global DATA_TYPE *)in.ptr); + input_ptr += input_offset_first_element_in_bytes + in_x * input_stride_x + in_y * input_stride_y + in_z * input_stride_z; + output_ptr += output_offset_first_element_in_bytes + out_x * output_stride_x + out_y * output_stride_y + out_z * output_stride_z; + *((__global DATA_TYPE *)output_ptr) = *((__global DATA_TYPE *)input_ptr); } |