aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorViet-Hoa Do <viet-hoa.do@arm.com>2024-01-16 16:23:24 +0000
committerViet-Hoa Do <viet-hoa.do@arm.com>2024-01-18 11:32:09 +0000
commit6829e0201e886cfd311e39f1d88f7452894bdfe5 (patch)
treeac1b97df4b76d0d09652ef3dbdce2be3d222df3c
parent8896cf7cb7df34c699e7453a0f0c683d1202ed15 (diff)
downloadComputeLibrary-6829e0201e886cfd311e39f1d88f7452894bdfe5.tar.gz
Fix divide-by-zero compilation error
* CONVERT_TO_TENSOR4D_STRUCT_NO_STEP is implemented and used in some CL kernels in the way that causes divide-by-zero issue. - Since the steps are all zeros, the issue might have been ignored by the compiler. Resolves: COMPMID-6795 Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com> Change-Id: I0fb38fc62d63671b8abefa39b3d9b3ca6f49c7fe Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10967 Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/CL/cl_kernels/common/col2im.cl4
-rw-r--r--src/core/CL/cl_kernels/common/gather.cl9
-rw-r--r--src/core/CL/cl_kernels/common/instance_normalization.cl8
-rw-r--r--src/core/CL/cl_kernels/common/permute.cl4
-rw-r--r--src/core/CL/cl_kernels/common/reverse.cl4
-rw-r--r--src/core/CL/cl_kernels/common/slice_ops.cl6
-rw-r--r--src/core/CL/cl_kernels/common/tile.cl6
-rw-r--r--src/core/CL/cl_kernels/helpers.h28
-rw-r--r--src/core/CL/cl_kernels/nchw/batch_to_space.cl6
-rw-r--r--src/core/CL/cl_kernels/nchw/depth_to_space.cl6
-rw-r--r--src/core/CL/cl_kernels/nchw/space_to_batch.cl6
-rw-r--r--src/core/CL/cl_kernels/nchw/space_to_depth.cl6
-rw-r--r--src/core/CL/cl_kernels/nhwc/batch_to_space.cl8
-rw-r--r--src/core/CL/cl_kernels/nhwc/depth_to_space.cl6
-rw-r--r--src/core/CL/cl_kernels/nhwc/space_to_batch.cl8
-rw-r--r--src/core/CL/cl_kernels/nhwc/space_to_depth.cl6
-rw-r--r--src/core/CL/kernels/CLGatherKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLStridedSliceKernel.cpp4
18 files changed, 69 insertions, 60 deletions
diff --git a/src/core/CL/cl_kernels/common/col2im.cl b/src/core/CL/cl_kernels/common/col2im.cl
index 89054dcb31..4dc005fd43 100644
--- a/src/core/CL/cl_kernels/common/col2im.cl
+++ b/src/core/CL/cl_kernels/common/col2im.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -67,7 +67,7 @@ __kernel void col2im(
TENSOR4D_DECLARATION(dst))
{
Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
- Tensor4D dst = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(dst, 0);
+ Tensor4D dst = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(dst);
const uint xd = get_global_id(1) % WIDTH_OUTPUT; // x coordinate of the destination tensor
const uint yd = get_global_id(1) / WIDTH_OUTPUT; // y coordinate of the destination tensor
diff --git a/src/core/CL/cl_kernels/common/gather.cl b/src/core/CL/cl_kernels/common/gather.cl
index 5d180f3781..e16f4bf315 100644
--- a/src/core/CL/cl_kernels/common/gather.cl
+++ b/src/core/CL/cl_kernels/common/gather.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,7 +29,6 @@
* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
* @note Axis should be given as a preprocessor argument using -DAXIS=axis. e.g. -DAXIS=1
* @attention Output tensor depth should be given as a preprocessor argument using -DOUTPUT_DIM_Z=size. e.g. -DOUTPUT_DIM_Z=16
- * @attention Input tensor depth should be given as a preprocessor argument using -DINPUT_DIM_Z=size. e.g. -DINPUT_DIM_Z=16
*
*
* @param[in] input_ptr Pointer to the source tensor. Supported data types: All
@@ -67,8 +66,8 @@ __kernel void gather(
const int pz = get_global_id(2) % OUTPUT_DIM_Z;
const int pw = (get_global_id(2) / OUTPUT_DIM_Z );
- const Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, INPUT_DIM_Z);
- const Tensor4D indices = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(indices, INDICES_DIM_Z);
+ const Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input);
+ const Tensor4D indices = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(indices);
Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, OUTPUT_DIM_Z);
#if AXIS == 0
@@ -128,4 +127,4 @@ __kernel void gather(
*(__global DATA_TYPE *)output.ptr = select((DATA_TYPE)0, *((__global const DATA_TYPE *)input_addr), (DATA_TYPE)(index < INDEX_LIMIT));
}
-#endif //defined(DATA_TYPE) && defined(AXIS) \ No newline at end of file
+#endif //defined(DATA_TYPE) && defined(AXIS)
diff --git a/src/core/CL/cl_kernels/common/instance_normalization.cl b/src/core/CL/cl_kernels/common/instance_normalization.cl
index adfbebd67d..f9b3cd3620 100644
--- a/src/core/CL/cl_kernels/common/instance_normalization.cl
+++ b/src/core/CL/cl_kernels/common/instance_normalization.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -53,7 +53,7 @@ __kernel void compute_mean_var(
TENSOR4D_DECLARATION(input),
TENSOR3D_DECLARATION(output))
{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
+ Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input);
Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(output);
#if defined(NHWC)
@@ -176,10 +176,10 @@ __kernel void instance_normalization(
#endif /* IN_PLACE */
)
{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
+ Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input);
Tensor3D mean_var = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(mean_var);
#ifndef IN_PLACE
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0);
+ Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output);
#endif /* IN_PLACE */
#if defined(NHWC)
diff --git a/src/core/CL/cl_kernels/common/permute.cl b/src/core/CL/cl_kernels/common/permute.cl
index a03eeb1a19..1a97ca7495 100644
--- a/src/core/CL/cl_kernels/common/permute.cl
+++ b/src/core/CL/cl_kernels/common/permute.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -54,7 +54,7 @@ __kernel void permute(TENSOR4D_DECLARATION(input),
{
Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, DEPTH_IN);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0);
+ Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output);
int out_index[4] = { 0 };
int in_index[4] = { 0 };
diff --git a/src/core/CL/cl_kernels/common/reverse.cl b/src/core/CL/cl_kernels/common/reverse.cl
index f94bfb6640..e6df3041c2 100644
--- a/src/core/CL/cl_kernels/common/reverse.cl
+++ b/src/core/CL/cl_kernels/common/reverse.cl
@@ -1,5 +1,5 @@
/*
-* Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -71,7 +71,7 @@ __kernel void reverse(TENSOR4D_DECLARATION(src),
{
Tensor4D src = CONVERT_TO_TENSOR4D_STRUCT(src, depth);
Vector axis = CONVERT_TO_VECTOR_STRUCT_NO_STEP(axis);
- Tensor4D dst = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(dst, depth);
+ Tensor4D dst = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(dst);
const uint x_in = get_global_id(0);
const uint y_in = get_global_id(1);
diff --git a/src/core/CL/cl_kernels/common/slice_ops.cl b/src/core/CL/cl_kernels/common/slice_ops.cl
index d12c60f5ea..189d414aba 100644
--- a/src/core/CL/cl_kernels/common/slice_ops.cl
+++ b/src/core/CL/cl_kernels/common/slice_ops.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,7 +28,7 @@
* @attention Supported tensor rank: up to 4
*
* @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Input and output tensor dephts should be given as a preprocessor arguments using -DSRC_DEPTH=size. and -DDST_DEPTH=size
+ * @attention Output tensor depht should be given as a preprocessor arguments using -DDST_DEPTH=size
* @attention Absolute start coordinates for each dimension should be given as preprocessor -DSTART_index=value e.g. -DSTART_0=2
* @attention Strides for each dimension should be given as preprocessor -DSTRIDE_index=value e.g. -DSTRIDE_1=1
*
@@ -58,7 +58,7 @@ __kernel void strided_slice(
TENSOR4D_DECLARATION(output))
{
// Get pixels pointer
- Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, SRC_DEPTH);
+ Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input);
Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DST_DEPTH);
int offset = 0;
diff --git a/src/core/CL/cl_kernels/common/tile.cl b/src/core/CL/cl_kernels/common/tile.cl
index 971750b7b2..4d8f802ea1 100644
--- a/src/core/CL/cl_kernels/common/tile.cl
+++ b/src/core/CL/cl_kernels/common/tile.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,8 +50,8 @@ __kernel void tile(
TENSOR4D_DECLARATION(input),
TENSOR4D_DECLARATION(output))
{
- Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, DST_DEPTH);
- Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, SRC_DEPTH);
+ Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output);
+ Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input);
// For all coordinates but x, each tile copies from the input
const int y = get_global_id(1);
diff --git a/src/core/CL/cl_kernels/helpers.h b/src/core/CL/cl_kernels/helpers.h
index 87a1875f93..6e05a513ec 100644
--- a/src/core/CL/cl_kernels/helpers.h
+++ b/src/core/CL/cl_kernels/helpers.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2023 Arm Limited.
+ * Copyright (c) 2016-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_HELPER_H
-#define ARM_COMPUTE_HELPER_H
+#ifndef ACL_SRC_CORE_CL_CL_KERNELS_HELPERS_H
+#define ACL_SRC_CORE_CL_CL_KERNELS_HELPERS_H
#include "load_store_utility.h"
@@ -903,9 +903,9 @@
name##_stride_y, name##_step_y, name##_stride_z, name##_step_z, name##_stride_w, \
name##_step_w, mod_size)
-#define CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(name, mod_size) \
- update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, \
- name##_stride_y, 0, name##_stride_z, 0, name##_stride_w, 0, mod_size)
+#define CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(name) \
+ update_tensor4D_workitem_no_step_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
+ name##_stride_y, name##_stride_z, name##_stride_w)
#define CONVERT_TO_TENSOR3D_STRUCT_NO_UPDATE_PTR(name) \
tensor3D_ptr_no_update(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, \
@@ -1109,6 +1109,20 @@ inline Tensor4D update_tensor4D_workitem_ptr(__global uchar *ptr,
return tensor;
}
+inline Tensor4D update_tensor4D_workitem_no_step_ptr(
+ __global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint stride_y, uint stride_z, uint stride_w)
+{
+ Tensor4D tensor = {.ptr = ptr,
+ .offset_first_element_in_bytes = offset_first_element_in_bytes,
+ .stride_x = stride_x,
+ .stride_y = stride_y,
+ .stride_z = stride_z,
+ .stride_w = stride_w};
+
+ tensor.ptr += tensor.offset_first_element_in_bytes;
+ return tensor;
+}
+
/** Get the pointer position of a Vector
*
* @param[in] vec Pointer to the starting position of the buffer
@@ -1181,4 +1195,4 @@ inline __global const uchar *tensor3D_index2ptr(const Tensor3D *tensor, uint wid
tensor->offset_first_element_in_bytes;
}
-#endif // _HELPER_H
+#endif // ACL_SRC_CORE_CL_CL_KERNELS_HELPERS_H
diff --git a/src/core/CL/cl_kernels/nchw/batch_to_space.cl b/src/core/CL/cl_kernels/nchw/batch_to_space.cl
index a813715b89..d83e81347e 100644
--- a/src/core/CL/cl_kernels/nchw/batch_to_space.cl
+++ b/src/core/CL/cl_kernels/nchw/batch_to_space.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -62,7 +62,7 @@ __kernel void batch_to_space_nchw(
VECTOR_DECLARATION(block_shape),
TENSOR3D_DECLARATION(output))
{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
+ Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input);
Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output);
Vector block = CONVERT_TO_VECTOR_STRUCT_NO_STEP(block_shape);
@@ -112,7 +112,7 @@ __kernel void batch_to_space_static_nchw(
const int batch_id,
TENSOR3D_DECLARATION(output))
{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
+ Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input);
Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output);
const int block_x = BLOCK_SHAPE_X;
diff --git a/src/core/CL/cl_kernels/nchw/depth_to_space.cl b/src/core/CL/cl_kernels/nchw/depth_to_space.cl
index b9f223fe9d..57183393d2 100644
--- a/src/core/CL/cl_kernels/nchw/depth_to_space.cl
+++ b/src/core/CL/cl_kernels/nchw/depth_to_space.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -54,7 +54,7 @@ __kernel void depth_to_space_nchw(
TENSOR4D_DECLARATION(output))
{
Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(input);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0);
+ Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output);
const int r = (CHANNEL_SIZE / (BLOCK_SHAPE * BLOCK_SHAPE));
const int x = get_global_id(0);
@@ -66,4 +66,4 @@ __kernel void depth_to_space_nchw(
*((__global DATA_TYPE *)tensor4D_offset(&out, out_x, out_y, z, batch_id)) = *((__global DATA_TYPE *)in.ptr);
}
-#endif // defined(DATA_TYPE) && defined(BLOCK_SHAPE) && defined(CHANNEL_SIZE) \ No newline at end of file
+#endif // defined(DATA_TYPE) && defined(BLOCK_SHAPE) && defined(CHANNEL_SIZE)
diff --git a/src/core/CL/cl_kernels/nchw/space_to_batch.cl b/src/core/CL/cl_kernels/nchw/space_to_batch.cl
index e162a29bb0..91520213e8 100644
--- a/src/core/CL/cl_kernels/nchw/space_to_batch.cl
+++ b/src/core/CL/cl_kernels/nchw/space_to_batch.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -64,7 +64,7 @@ __kernel void space_to_batch_nchw(
const int batch_id,
TENSOR3D_DECLARATION(output))
{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
+ Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input);
Image pad = CONVERT_TO_IMAGE_STRUCT_NO_STEP(paddings);
Vector block = CONVERT_TO_VECTOR_STRUCT_NO_STEP(block_shape);
Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output);
@@ -131,7 +131,7 @@ __kernel void space_to_batch_static_nchw(
const int batch_id,
TENSOR3D_DECLARATION(output))
{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
+ Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input);
Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output);
int block_x = BLOCK_SHAPE_X;
diff --git a/src/core/CL/cl_kernels/nchw/space_to_depth.cl b/src/core/CL/cl_kernels/nchw/space_to_depth.cl
index aea02e813b..8097f65942 100644
--- a/src/core/CL/cl_kernels/nchw/space_to_depth.cl
+++ b/src/core/CL/cl_kernels/nchw/space_to_depth.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -53,7 +53,7 @@ __kernel void space_to_depth_nchw(
const int batch_id,
TENSOR3D_DECLARATION(output))
{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
+ Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input);
Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output);
const int r = (CHANNEL_SIZE / (BLOCK_SHAPE * BLOCK_SHAPE));
@@ -66,4 +66,4 @@ __kernel void space_to_depth_nchw(
*((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, in_x, in_y, z, batch_id));
}
-#endif // defined(DATA_TYPE) && defined(BLOCK_SHAPE) && defined(CHANNEL_SIZE) \ No newline at end of file
+#endif // defined(DATA_TYPE) && defined(BLOCK_SHAPE) && defined(CHANNEL_SIZE)
diff --git a/src/core/CL/cl_kernels/nhwc/batch_to_space.cl b/src/core/CL/cl_kernels/nhwc/batch_to_space.cl
index 16141bcd2e..b910a753a6 100644
--- a/src/core/CL/cl_kernels/nhwc/batch_to_space.cl
+++ b/src/core/CL/cl_kernels/nhwc/batch_to_space.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -63,7 +63,7 @@ __kernel void batch_to_space_nhwc(
TENSOR3D_DECLARATION(output))
{
Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output);
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
+ Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input);
Vector block = CONVERT_TO_VECTOR_STRUCT_NO_STEP(block_shape);
const int block_x = *((__global int *)vector_offset(&block, 0));
@@ -113,7 +113,7 @@ __kernel void batch_to_space_static_nhwc(
TENSOR3D_DECLARATION(output))
{
Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output);
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
+ Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input);
const int block_x = BLOCK_SHAPE_X;
const int block_y = BLOCK_SHAPE_Y;
@@ -128,4 +128,4 @@ __kernel void batch_to_space_static_nhwc(
*((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, z, in_x, in_y, in_batch));
}
-#endif // defined(DATA_TYPE) && defined(BATCH_SIZE) && defined(BLOCK_SHAPE_X) && defined(BLOCK_SHAPE_Y) \ No newline at end of file
+#endif // defined(DATA_TYPE) && defined(BATCH_SIZE) && defined(BLOCK_SHAPE_X) && defined(BLOCK_SHAPE_Y)
diff --git a/src/core/CL/cl_kernels/nhwc/depth_to_space.cl b/src/core/CL/cl_kernels/nhwc/depth_to_space.cl
index 5464a4bef8..84f8aa7263 100644
--- a/src/core/CL/cl_kernels/nhwc/depth_to_space.cl
+++ b/src/core/CL/cl_kernels/nhwc/depth_to_space.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -54,7 +54,7 @@ __kernel void depth_to_space_nhwc(
TENSOR4D_DECLARATION(output))
{
Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(input);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0);
+ Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output);
const int r = (CHANNEL_SIZE / (BLOCK_SHAPE * BLOCK_SHAPE));
const int x = get_global_id(1);
@@ -66,4 +66,4 @@ __kernel void depth_to_space_nhwc(
*((__global DATA_TYPE *)tensor4D_offset(&out, z, out_x, out_y, batch_id)) = *((__global DATA_TYPE *)in.ptr);
}
-#endif // defined(DATA_TYPE) && defined(BLOCK_SHAPE) && defined(CHANNEL_SIZE) \ No newline at end of file
+#endif // defined(DATA_TYPE) && defined(BLOCK_SHAPE) && defined(CHANNEL_SIZE)
diff --git a/src/core/CL/cl_kernels/nhwc/space_to_batch.cl b/src/core/CL/cl_kernels/nhwc/space_to_batch.cl
index 785206e3b9..695bd4c217 100644
--- a/src/core/CL/cl_kernels/nhwc/space_to_batch.cl
+++ b/src/core/CL/cl_kernels/nhwc/space_to_batch.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -64,7 +64,7 @@ __kernel void space_to_batch_nhwc(
const int batch_id,
TENSOR3D_DECLARATION(output))
{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
+ Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input);
Image pad = CONVERT_TO_IMAGE_STRUCT_NO_STEP(paddings);
Vector block = CONVERT_TO_VECTOR_STRUCT_NO_STEP(block_shape);
Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output);
@@ -130,7 +130,7 @@ __kernel void space_to_batch_static_nhwc(
const int batch_id,
TENSOR3D_DECLARATION(output))
{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
+ Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input);
Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output);
int block_x = BLOCK_SHAPE_X;
@@ -152,4 +152,4 @@ __kernel void space_to_batch_static_nhwc(
*((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, z, in_x, in_y, w));
}
}
-#endif // defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(BLOCK_SHAPE_X) && defined(BLOCK_SHAPE_Y) && defined(PAD_LEFT_X) && defined(PAD_RIGHT_X) && defined(PAD_LEFT_Y) && defined(PAD_RIGHT_Y) && defined(WIDTH_IN) && defined(HEIGHT_IN) \ No newline at end of file
+#endif // defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(BLOCK_SHAPE_X) && defined(BLOCK_SHAPE_Y) && defined(PAD_LEFT_X) && defined(PAD_RIGHT_X) && defined(PAD_LEFT_Y) && defined(PAD_RIGHT_Y) && defined(WIDTH_IN) && defined(HEIGHT_IN)
diff --git a/src/core/CL/cl_kernels/nhwc/space_to_depth.cl b/src/core/CL/cl_kernels/nhwc/space_to_depth.cl
index d44e78d990..10aac6d5fb 100644
--- a/src/core/CL/cl_kernels/nhwc/space_to_depth.cl
+++ b/src/core/CL/cl_kernels/nhwc/space_to_depth.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -53,7 +53,7 @@ __kernel void space_to_depth_nhwc(
const int batch_id,
TENSOR3D_DECLARATION(output))
{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
+ Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input);
Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output);
const int r = (CHANNEL_SIZE / (BLOCK_SHAPE * BLOCK_SHAPE));
@@ -66,4 +66,4 @@ __kernel void space_to_depth_nhwc(
*((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, z, in_x, in_y, batch_id));
}
-#endif // defined(DATA_TYPE) && defined(BLOCK_SHAPE) && defined(CHANNEL_SIZE) \ No newline at end of file
+#endif // defined(DATA_TYPE) && defined(BLOCK_SHAPE) && defined(CHANNEL_SIZE)
diff --git a/src/core/CL/kernels/CLGatherKernel.cpp b/src/core/CL/kernels/CLGatherKernel.cpp
index c11a18940a..904bb07282 100644
--- a/src/core/CL/kernels/CLGatherKernel.cpp
+++ b/src/core/CL/kernels/CLGatherKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -112,8 +112,6 @@ void CLGatherKernel::configure(const CLCompileContext &compile_context,
build_opts.add_option("-DDATA_TYPE=" +
get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type())));
build_opts.add_option("-DOUTPUT_DIM_Z=" + support::cpp11::to_string(output->info()->dimension(2)));
- build_opts.add_option("-DINDICES_DIM_Z=" + support::cpp11::to_string(indices->info()->dimension(2)));
- build_opts.add_option("-DINPUT_DIM_Z=" + support::cpp11::to_string(input->info()->dimension(2)));
build_opts.add_option("-DINDICES_DIMS=" + support::cpp11::to_string(indices->info()->num_dimensions()));
build_opts.add_option("-DAXIS=" + support::cpp11::to_string(_axis));
build_opts.add_option("-DINDEX_LIMIT=" + support::cpp11::to_string(input->info()->tensor_shape()[_axis]));
diff --git a/src/core/CL/kernels/CLStridedSliceKernel.cpp b/src/core/CL/kernels/CLStridedSliceKernel.cpp
index a8f6112820..20cd835069 100644
--- a/src/core/CL/kernels/CLStridedSliceKernel.cpp
+++ b/src/core/CL/kernels/CLStridedSliceKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021, 2023 Arm Limited.
+ * Copyright (c) 2018-2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -142,8 +142,6 @@ void CLStridedSliceKernel::configure(const CLCompileContext &compile_context,
build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(
std::max<int>(output_width_x - vec_size_x, 0)));
build_opts.add_option_if(multi_access_x, "-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
- build_opts.add_option_if_else(input_shape.num_dimensions() > 2,
- "-DSRC_DEPTH=" + support::cpp11::to_string(input_shape.z()), "-DSRC_DEPTH=1");
build_opts.add_option_if_else(output->num_dimensions() > 2,
"-DDST_DEPTH=" + support::cpp11::to_string(output->tensor_shape().z()),
"-DDST_DEPTH=1");