aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRamy Elgammal <ramy.elgammal@arm.com>2022-12-16 13:39:33 +0000
committerRamy Elgammal <ramy.elgammal@arm.com>2022-12-23 17:17:18 +0000
commit8468371b3e2ec42ee0b9b670d45d99eb1015574b (patch)
treea5fc48d97c6ff93757cd11897b24dbbeb0366cdf
parent04f4620cf999846a44089c81720aa920edec6993 (diff)
downloadComputeLibrary-8468371b3e2ec42ee0b9b670d45d99eb1015574b.tar.gz
Make CLReshape kernel window based on dst instead of src
Signed-off-by: Ramy Elgammal <ramy.elgammal@arm.com> Partially-Resolves: COMPMID-5522 Change-Id: I1d90003079c3f24d081cc49f7b110eda753f6995 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8838 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/CL/cl_kernels/common/reshape_layer.cl26
-rw-r--r--src/gpu/cl/kernels/ClReshapeKernel.cpp4
2 files changed, 15 insertions, 15 deletions
diff --git a/src/core/CL/cl_kernels/common/reshape_layer.cl b/src/core/CL/cl_kernels/common/reshape_layer.cl
index bfdefc863e..c47664bf85 100644
--- a/src/core/CL/cl_kernels/common/reshape_layer.cl
+++ b/src/core/CL/cl_kernels/common/reshape_layer.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -51,20 +51,20 @@ __kernel void reshape_layer(TENSOR3D_DECLARATION(input),
int2 input_shape,
int2 output_shape)
{
- Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(input);
- Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(output);
+ int out_x = get_global_id(0);
+ int out_y = get_global_id(1);
+ int out_z = get_global_id(2);
- int3 id = (int3)(get_global_id(0), get_global_id(1), get_global_id(2));
+ // Compute the output linearized index
+ int out_linear_idx = out_x + out_y * output_shape.x + out_z * output_shape.x * output_shape.y;
- // Linearize index
- int linear_idx = id.x + id.y * input_shape.x + id.z * input_shape.x * input_shape.y;
-
- // Translate to output
- int3 out_id;
- out_id.x = linear_idx % output_shape.x;
- out_id.y = (linear_idx / output_shape.x) % output_shape.y;
- out_id.z = linear_idx / (output_shape.x * output_shape.y);
+ // Translate to intput
+ int in_x = out_linear_idx % input_shape.x;
+ int in_y = (out_linear_idx / input_shape.x) % input_shape.y;
+ int in_z = out_linear_idx / (input_shape.x * input_shape.y);
// Store result
- *((__global DATA_TYPE *)tensor3D_offset(&out, out_id.x, out_id.y, out_id.z)) = *((__global DATA_TYPE *)in.ptr);
+ input_ptr += input_offset_first_element_in_bytes + in_x * input_stride_x + in_y * input_stride_y + in_z * input_stride_z;
+ output_ptr += output_offset_first_element_in_bytes + out_x * output_stride_x + out_y * output_stride_y + out_z * output_stride_z;
+ *((__global DATA_TYPE *)output_ptr) = *((__global DATA_TYPE *)input_ptr);
}
diff --git a/src/gpu/cl/kernels/ClReshapeKernel.cpp b/src/gpu/cl/kernels/ClReshapeKernel.cpp
index 246bd9c838..121bb33edf 100644
--- a/src/gpu/cl/kernels/ClReshapeKernel.cpp
+++ b/src/gpu/cl/kernels/ClReshapeKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -98,7 +98,7 @@ void ClReshapeKernel::configure(const CLCompileContext &compile_context, const I
_kernel.setArg<cl_int2>(idx++, dst_shape);
// Configure kernel window
- Window win = calculate_max_window(*src);
+ Window win = calculate_max_window(*dst);
ICLKernel::configure_internal(win);
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));