aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2022-12-30 16:07:45 +0000
committerGian Marco Iodice <gianmarco.iodice@arm.com>2023-01-10 09:57:51 +0000
commit3cce35dcad8bc8f53a1e6613f719af9ab04feda6 (patch)
treee1015566852ebce4af897db37cf5cb1989c29924
parentd2d9361a0a338bce478f7d85b4af70d1ed20f26c (diff)
downloadComputeLibrary-3cce35dcad8bc8f53a1e6613f719af9ab04feda6.tar.gz
Extend cl image support to input and output tensors
- Add support for texture image to input and output of direct convolution - Extend T_LOAD2D_INDIRECT macro to read values from cl image storages Resolves COMPMID-5715 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Change-Id: Idb0410f53f6d0763cd9e39895a7cbf9bc826d33a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8904 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/KernelDescriptors.h4
-rw-r--r--src/core/CL/CLUtils.cpp18
-rw-r--r--src/core/CL/CLUtils.h12
-rw-r--r--src/core/CL/cl_kernels/helpers.h28
-rw-r--r--src/core/CL/cl_kernels/nhwc/direct_convolution.cl42
-rw-r--r--src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl24
-rw-r--r--src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl21
-rw-r--r--src/core/CL/cl_kernels/nhwc/indirect_convolution.cl40
-rw-r--r--src/core/CL/cl_kernels/nhwc/scale.cl64
-rw-r--r--src/core/CL/cl_kernels/nhwc/transposed_convolution.cl14
-rw-r--r--src/core/CL/cl_kernels/tile_helpers.h86
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp9
-rw-r--r--src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp8
-rw-r--r--src/gpu/cl/kernels/ClDirectConv2dKernel.cpp67
-rw-r--r--src/gpu/cl/kernels/ClDirectConv2dKernel.h6
-rw-r--r--src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp4
-rw-r--r--src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp4
-rw-r--r--src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel.cpp4
-rw-r--r--src/gpu/cl/kernels/ClIndirectConv2dKernel.cpp4
19 files changed, 312 insertions, 147 deletions
diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h
index 4a64032b14..19ac254c04 100644
--- a/arm_compute/core/KernelDescriptors.h
+++ b/arm_compute/core/KernelDescriptors.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -117,6 +117,8 @@ struct DirectConvComputeKernelInfo
int32_t n0{ 1 }; /**< Number of columns to be processed by the kernel */
int32_t k0{ 1 }; /**< Number of partial accumulations to be processed in a single iteration by the kernel */
bool export_weights_to_cl_image{ false }; /**< Flag to export the weights to cl_image */
+ bool export_output_to_cl_image{ false }; /**< Flag to export the output to cl_image */
+ bool export_input_to_cl_image{ false }; /**< Flag to export the input to cl_image */
};
/** Descriptor used by the softmax kernels */
diff --git a/src/core/CL/CLUtils.cpp b/src/core/CL/CLUtils.cpp
index 8f39c2d700..84cf88e099 100644
--- a/src/core/CL/CLUtils.cpp
+++ b/src/core/CL/CLUtils.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2022 Arm Limited.
+ * Copyright (c) 2020-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,7 +32,7 @@
namespace arm_compute
{
-cl::Image2D create_image2d_from_buffer(const cl::Context &ctx, const cl::Buffer &buffer, const TensorShape &shape2d, DataType data_type, size_t image_row_pitch)
+cl::Image2D create_image2d_from_buffer(const cl::Context &ctx, const cl::Buffer &buffer, const TensorShape &shape2d, DataType data_type, size_t image_row_pitch, CLImage2DType type)
{
cl_channel_type cl_data_type;
@@ -61,7 +61,17 @@ cl::Image2D create_image2d_from_buffer(const cl::Context &ctx, const cl::Buffer
desc.image_width = shape2d[0];
desc.image_height = shape2d[1];
- cl_image = clCreateImage(ctx(), CL_MEM_READ_ONLY, &format, &desc, nullptr, &err);
+ switch(type)
+ {
+ case CLImage2DType::ReadOnly:
+ cl_image = clCreateImage(ctx(), CL_MEM_READ_ONLY, &format, &desc, nullptr, &err);
+ break;
+ case CLImage2DType::WriteOnly:
+ cl_image = clCreateImage(ctx(), CL_MEM_WRITE_ONLY, &format, &desc, nullptr, &err);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported CLImage2DType");
+ }
ARM_COMPUTE_UNUSED(err);
ARM_COMPUTE_ERROR_ON_MSG(err != CL_SUCCESS, "Error during the creation of CL image from buffer");
@@ -176,4 +186,4 @@ void PostOpCLKernelUtils::set_post_ops_cl_kernel_name(std::string &kernel_name,
}
} // namespace experimental
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/src/core/CL/CLUtils.h b/src/core/CL/CLUtils.h
index d133e4fe6f..b31944c72f 100644
--- a/src/core/CL/CLUtils.h
+++ b/src/core/CL/CLUtils.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,6 +34,13 @@ class TensorShape;
class CLBuildOptions;
class ITensorInfo;
+/** OpenCL Image2D types */
+enum class CLImage2DType
+{
+ ReadOnly,
+ WriteOnly
+};
+
/** Create a cl::Image2D object from an OpenCL buffer
*
* @note The following conditions are required to create a OpenCL image object from OpenCL buffer,
@@ -49,10 +56,11 @@ class ITensorInfo;
* @param[in] shape2d 2D tensor shape
* @param[in] data_type DataType to use. Only supported: F32,F16
* @param[in] image_row_pitch Image row pitch (a.k.a. stride Y) to be used in the image2d object
+ * @param[in] image_type Image 2D type (@ref CLImage2DType)
*
* @return cl::Image2D object
*/
-cl::Image2D create_image2d_from_buffer(const cl::Context &ctx, const cl::Buffer &buffer, const TensorShape &shape2d, DataType data_type, size_t image_row_pitch);
+cl::Image2D create_image2d_from_buffer(const cl::Context &ctx, const cl::Buffer &buffer, const TensorShape &shape2d, DataType data_type, size_t image_row_pitch, CLImage2DType image_type);
namespace experimental
{
diff --git a/src/core/CL/cl_kernels/helpers.h b/src/core/CL/cl_kernels/helpers.h
index 298edc244f..c5d94ccb04 100644
--- a/src/core/CL/cl_kernels/helpers.h
+++ b/src/core/CL/cl_kernels/helpers.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2022 Arm Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -438,6 +438,16 @@
#define read_image2d_halfx4(img, x_coord, y_coord) (half16)(read_imageh(img, (int2)(x_coord, y_coord)), read_imageh(img, (int2)(x_coord + 1, y_coord)), read_imageh(img, (int2)(x_coord + 2, y_coord)), read_imageh(img, (int2)(x_coord + 3, y_coord)));
#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
+#define write_image2d_floatx1(img, x_coord, y_coord, values) (write_imagef(img, (int2)(x_coord, y_coord), values));
+#define write_image2d_floatx2(img, x_coord, y_coord, values) (write_imagef(img, (int2)(x_coord, y_coord), values.s0123), write_imagef(img, (int2)(x_coord + 1, y_coord), values.s4567));
+#define write_image2d_floatx4(img, x_coord, y_coord, values) (write_imagef(img, (int2)(x_coord, y_coord), values.s0123), write_imagef(img, (int2)(x_coord + 1, y_coord), values.s4567), write_imagef(img, (int2)(x_coord + 2, y_coord), values.s89AB), write_imagef(img, (int2)(x_coord + 3, y_coord), values.sCDEF));
+
+#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
+#define write_image2d_halfx1(img, x_coord, y_coord, values) (write_imageh(img, (int2)(x_coord, y_coord), values));
+#define write_image2d_halfx2(img, x_coord, y_coord, values) (write_imageh(img, (int2)(x_coord, y_coord), values.s0123), write_imageh(img, (int2)(x_coord + 1, y_coord), values.s4567));
+#define write_image2d_halfx4(img, x_coord, y_coord, values) (write_imageh(img, (int2)(x_coord, y_coord), values.s0123), write_imageh(img, (int2)(x_coord + 1, y_coord), values.s4567), write_imageh(img, (int2)(x_coord + 2, y_coord), values.s89AB), write_imageh(img, (int2)(x_coord + 3, y_coord), values.sCDEF));
+#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
+
/** Utility macro to read a 2D OpenCL image object.
*
* @note Coordinates are not normalized
@@ -454,6 +464,22 @@
#define READ_IMAGE2D_STR(data_type, n0, img, x_coord, y_coord) read_image2d_##data_type##x##n0(img, x_coord, y_coord)
#define READ_IMAGE2D(data_type, n0, img, x_coord, y_coord) READ_IMAGE2D_STR(data_type, n0, img, x_coord, y_coord)
+/** Utility macro to write a 2D OpenCL image object.
+ *
+ * @note Coordinates are not normalized
+ *
+ * @param[in] data_type Data type
+ * @param[in] n0 Number of pixel to write. Only 1,2 and 4 is supported
+ * @param[in] img OpenCL image object
+ * @param[in] x_coord The x coordinate for the top-left pixel
+ * @param[in] y_coord The y coordinate for the top-left pixel
+ * @param[in] values Values to write
+ *
+ * @{
+ */
+#define WRITE_IMAGE2D_STR(data_type, n0, img, x_coord, y_coord, values) write_image2d_##data_type##x##n0(img, x_coord, y_coord, values)
+#define WRITE_IMAGE2D(data_type, n0, img, x_coord, y_coord, values) WRITE_IMAGE2D_STR(data_type, n0, img, x_coord, y_coord, values)
+
#define VSTORE_STR(size) vstore##size
#define VSTORE(size) VSTORE_STR(size)
diff --git a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
index 4693a1fbcd..81ceeb8846 100644
--- a/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
+++ b/src/core/CL/cl_kernels/nhwc/direct_convolution.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022 Arm Limited.
+ * Copyright (c) 2021-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -66,36 +66,36 @@
* - The weights offset e.g. -DWEI_OFFSET=4
* - The quantized zero value e.g. -DZERO_VALUE=4
*
+ * @param[in] src_img (Not supported) Read only cl_image object for the source tensor. Included when SRC_TENSOR_TYPE=IMAGE
* @param[in] src_ptr Pointer to the source tensor. Supported data type: F16/F32/QASYMM8
- * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] src_stride_w Stride of the source tensor in W dimension (in bytes)
- * @param[in] src_step_w src_stride_w * number of elements along W processed per workitem(in bytes)
+ * @param[in] src_c The size of the channels dimension of the source tensor
+ * @param[in] src_w The size of the width dimension of the source tensor
+ * @param[in] src_h The size of the height dimension of the source tensor
+ * @param[in] src_n The size of the batches dimension of the source tensor
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[out] dst_img (Not supported) Write only cl_image object for the destination tensor. Included when DST_TENSOR_TYPE=IMAGE
* @param[out] dst_ptr Pointer to the destination tensor. Supported data type: same as @p src_ptr
- * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] dst_stride_w Stride of the destination tensor in W dimension (in bytes)
- * @param[in] dst_step_w dst_stride_w * number of elements along W processed per workitem(in bytes)
+ * @param[in] dst_c The size of the channels dimension of the destination tensor
+ * @param[in] dst_w The size of the width dimension of the destination tensor
+ * @param[in] dst_h The size of the height dimension of the destination tensor
+ * @param[in] dst_n The size of the batches dimension of the destination tensor
* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ * @param[in] wei_img (Optional) Read only cl_image object for the weights tensor. Included when WEI_TENSOR_TYPE=IMAGE
* @param[in] wei_ptr Pointer to the weights tensor. Supported data type: same as @p src_ptr
- * @param[in] wei_stride_x Stride of the weights tensor in X dimension (in bytes)
- * @param[in] wei_step_x wei_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] wei_stride_y Stride of the weights tensor in Y dimension (in bytes)
- * @param[in] wei_step_y wei_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] wei_stride_z Stride of the weights tensor in Z dimension (in bytes)
- * @param[in] wei_step_z wei_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] wei_stride_w Stride of the weights tensor in W dimension (in bytes)
- * @param[in] wei_step_w wei_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[in] wei_offset_first_element_in_bytes The offset of the first element in the bias matrix
+ * @param[in] wei_c The size of the channels dimension of the weights tensor
+ * @param[in] wei_w The size of the width dimension of the weights tensor
+ * @param[in] wei_h The size of the height dimension of the weights tensor
+ * @param[in] wei_n The size of the batches dimension of the weights tensor
+ * @param[in] wei_offset_first_element_in_bytes The offset of the first element in the weights matrix
* @param[in] bia_ptr (Optional) Pointer to the bias tensor Supported data type: same as @p src_ptr (if F32/F16) or S32 (if QASYMM8/QASYMM8_SIGNED)
* @param[in] bia_stride_x (Optional) Stride of the bias tensor in X dimension (in bytes)
* @param[in] bia_step_x (Optional) bia_stride_x * number of elements along X processed per workitem(in bytes)
@@ -103,9 +103,9 @@
*/
//! @endcond
__kernel void direct_convolution_nhwc(
- TENSOR4D_T(src, SRC_TENSOR_TYPE),
- TENSOR4D_T(dst, DST_TENSOR_TYPE),
- TENSOR4D_T(wei, WEI_TENSOR_TYPE)
+ TENSOR4D_RO_T(src, SRC_TENSOR_TYPE),
+ TENSOR4D_WO_T(dst, DST_TENSOR_TYPE),
+ TENSOR4D_RO_T(wei, WEI_TENSOR_TYPE)
#if defined(HAS_BIAS)
,
VECTOR_DECLARATION(bia)
@@ -292,4 +292,4 @@ __kernel void direct_convolution_nhwc(
#undef _IDST_HEIGHT
#undef _IDST_CHANNELS
#undef _IY_MULTIPLIER
-} \ No newline at end of file
+}
diff --git a/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl b/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl
index 345469063a..dcbae220b6 100644
--- a/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl
+++ b/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022 Arm Limited.
+ * Copyright (c) 2021-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -54,6 +54,7 @@
* @note The number of rows to read from the src tensor must be passed at compile time using -DM0_A (e.g., -DM0_A=3). M0_A must be equal to WEI_WIDTH + (M0 - 1)
* @note The number of columns to read from the src tensor must be passed at compile time using -DN0_A. It can either be 1 (for DEPTH_MULTIPLIER > 1) or N0 (for DEPTH_MULTIPLIER == 1)
*
+ * @param[in] src_img (Not supported) Read only cl_image object for the source tensor. Included when SRC_TENSOR_TYPE=IMAGE
* @param[in] src_ptr Pointer to the source tensor. Supported data type: F16/F32
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
@@ -63,6 +64,7 @@
* @param[in] src_h The size of the height dimension of the source tensor
* @param[in] src_n The size of the batches dimension of the source tensor
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[out] dst_img (Not supported) Write only cl_image object for the destination tensor. Included when DST_TENSOR_TYPE=IMAGE
* @param[out] dst_ptr Pointer to the destination tensor. Supported data type: same as @p src_ptr
* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
* @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
@@ -72,16 +74,16 @@
* @param[in] dst_h The size of the height dimension of the destination tensor
* @param[in] dst_n The size of the batches dimension of the destination tensor
* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ * @param[in] wei_img (Optional) Read only cl_image object for the weights tensor. Included when WEI_TENSOR_TYPE=IMAGE
* @param[in] wei_ptr Pointer to the weights tensor. Supported data type: same as @p src_ptr
- * @param[in] wei_stride_x Stride of the weights tensor in X dimension (in bytes)
- * @param[in] wei_step_x wei_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] wei_stride_y Stride of the weights tensor in Y dimension (in bytes)
- * @param[in] wei_step_y wei_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] wei_stride_z Stride of the weights tensor in Z dimension (in bytes)
- * @param[in] wei_step_z wei_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] wei_stride_w Stride of the weights tensor in W dimension (in bytes)
- * @param[in] wei_step_w wei_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[in] wei_offset_first_element_in_bytes The offset of the first element in the bias matrix
+ * @param[in] wei_c The size of the channels dimension of the weights tensor
+ * @param[in] wei_w The size of the width dimension of the weights tensor
+ * @param[in] wei_h The size of the height dimension of the weights tensor
+ * @param[in] wei_n The size of the batches dimension of the weights tensor
+ * @param[in] wei_offset_first_element_in_bytes The offset of the first element in the weigts matrix
* @param[in] bia_ptr (Optional) Pointer to the bias tensor Supported data type: same as @p src_ptr
* @param[in] bia_stride_x (Optional) Stride of the bias tensor in X dimension (in bytes)
* @param[in] bia_step_x (Optional) bia_stride_x * number of elements along X processed per workitem(in bytes)
@@ -89,9 +91,9 @@
*/
//! @endcond
__kernel void dwc_native_fp_nhwc(
- TENSOR4D_T(src, SRC_TENSOR_TYPE),
- TENSOR4D_T(dst, DST_TENSOR_TYPE),
- TENSOR4D(wei, WEI_TENSOR_TYPE)
+ TENSOR4D_RO_T(src, SRC_TENSOR_TYPE),
+ TENSOR4D_WO_T(dst, DST_TENSOR_TYPE),
+ TENSOR4D_RO_T(wei, WEI_TENSOR_TYPE)
#if defined(HAS_BIAS)
,
VECTOR_DECLARATION(bia)
@@ -206,4 +208,4 @@ __kernel void dwc_native_fp_nhwc(
}
#endif // defined(WEI_WIDTH) && defined(WEI_HEIGHT) && defined(N0) && defined(M0) && defined(DILATION_X) && defined(DILATION_Y) && defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_LEFT) && defined(PAD_TOP)
// *INDENT-ON*
-// clang-format on \ No newline at end of file
+// clang-format on
diff --git a/src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl b/src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl
index e2ffd444dd..2d255e5b61 100644
--- a/src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl
+++ b/src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022 Arm Limited.
+ * Copyright (c) 2021-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -77,6 +77,7 @@
* @note The number of rows to read from the src tensor must be passed at compile time using -DM0_A (e.g., -DM0_A=3). M0_A must be equal to WEI_WIDTH + (M0 - 1)
* @note The number of columns to read from the src tensor must be passed at compile time using -DN0_A. It can either be 1 (for DEPTH_MULTIPLIER > 1) or N0 (for DEPTH_MULTIPLIER == 1)
*
+ * @param[in] src_img (Not supported) Read only cl_image object for the source tensor. Included when SRC_TENSOR_TYPE=IMAGE
* @param[in] src_ptr Pointer to the source tensor. Supported data type: QSYMM8/QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
@@ -86,6 +87,7 @@
* @param[in] src_h The size of the height dimension of the source tensor
* @param[in] src_n The size of the batches dimension of the source tensor
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[out] dst_img (Not supported) Write only cl_image object for the destination tensor. Included when DST_TENSOR_TYPE=IMAGE
* @param[out] dst_ptr Pointer to the destination tensor. Supported data type: same as @p src_ptr
* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
* @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
@@ -95,14 +97,15 @@
* @param[in] dst_h The size of the height dimension of the destination tensor
* @param[in] dst_n The size of the batches dimension of the destination tensor
* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ * @param[in] wei_img (Not supported) Read only cl_image object for the weights tensor. Included when WEI_TENSOR_TYPE=IMAGE
* @param[in] wei_ptr Pointer to the weights tensor. Supported data type: same as @p src_ptr
- * @param[in] wei_stride_x Stride of the weights tensor in X dimension (in bytes)
- * @param[in] wei_step_x wei_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] wei_stride_y Stride of the weights tensor in Y dimension (in bytes)
- * @param[in] wei_step_y wei_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] wei_stride_z Stride of the weights tensor in Z dimension (in bytes)
- * @param[in] wei_step_z wei_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] wei_stride_w Stride of the weights tensor in W dimension (in bytes)
+ * @param[in] wei_c The size of the channels dimension of the weights tensor
+ * @param[in] wei_w The size of the width dimension of the weights tensor
+ * @param[in] wei_h The size of the height dimension of the weights tensor
+ * @param[in] wei_n The size of the batches dimension of the weights tensor
* @param[in] wei_step_w wei_stride_w * number of elements along W processed per workitem(in bytes)
* @param[in] wei_offset_first_element_in_bytes The offset of the first element in the weights tensor
* @param[in] dst_multipliers_ptr Pointer to the destination multipliers tensor for the per-channel quantization. Supported data type: S32
@@ -120,9 +123,9 @@
*/
//! @endcond
__kernel void dwc_native_quantized_nhwc(
- TENSOR4D_T(src, SRC_TENSOR_TYPE),
- TENSOR4D_T(dst, DST_TENSOR_TYPE),
- TENSOR4D(wei, WEI_TENSOR_TYPE),
+ TENSOR4D_RO_T(src, SRC_TENSOR_TYPE),
+ TENSOR4D_WO_T(dst, DST_TENSOR_TYPE),
+ TENSOR4D_RO_T(wei, WEI_TENSOR_TYPE),
VECTOR_DECLARATION(dst_multipliers),
VECTOR_DECLARATION(dst_shifts)
#if defined(HAS_BIAS)
@@ -269,4 +272,4 @@ __kernel void dwc_native_quantized_nhwc(
}
#endif // defined(WEI_WIDTH) && defined(WEI_HEIGHT) && defined(N0) && defined(M0) && defined(DILATION_X) && defined(DILATION_Y) && defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_LEFT) && defined(PAD_TOP)
// *INDENT-ON*
-// clang-format on \ No newline at end of file
+// clang-format on
diff --git a/src/core/CL/cl_kernels/nhwc/indirect_convolution.cl b/src/core/CL/cl_kernels/nhwc/indirect_convolution.cl
index c88f0034c5..aa719bfef0 100644
--- a/src/core/CL/cl_kernels/nhwc/indirect_convolution.cl
+++ b/src/core/CL/cl_kernels/nhwc/indirect_convolution.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,7 +41,7 @@
* @note The number of M0 rows (width*height) to process must be passed at compile time using -DM0 (e.g. -DM0=2)
* - M0 = 1, 2, 3, 4, 5, 6, 7, and 8
*
- * @param[out] dst_img (Not supported) CLImage object to the destination tensor (DST_TENSOR_TYPE=IMAGE only)
+ * @param[out] dst_img (Not supported) Write only cl_image object for the destination tensor. Included when DST_TENSOR_TYPE=IMAGE
* @param[out] dst_ptr Pointer to the destination tensor. Supported data type: INT32
* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
* @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
@@ -54,7 +54,7 @@
*/
//! @endcond
__kernel void indirect_convolution_address_precalculation(
- TENSOR4D_T(dst, DST_TENSOR_TYPE))
+ TENSOR4D_WO_T(dst, DST_TENSOR_TYPE))
{
const int x = get_global_id(0);
const int y = get_global_id(1);
@@ -123,7 +123,7 @@ __kernel void indirect_convolution_address_precalculation(
* - N0 = 2, 3, 4, 8, 16
* - K0 = 2, 3, 4, 8, 16 (only 4, 8 and 16 if WEI_TENSOR_TYPE=IMAGE)
*
- * @param[in] src_img (Not supported) CLImage object to the source tensor (SRC_TENSOR_TYPE=IMAGE only)
+ * @param[in] src_img (Not supported) Read only cl_image object for the source tensor. Included when SRC_TENSOR_TYPE=IMAGE
* @param[in] src_ptr Pointer to the source tensor. Supported data type: F16/F32
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
@@ -133,7 +133,7 @@ __kernel void indirect_convolution_address_precalculation(
* @param[in] src_h The size of the height dimension of the source tensor
* @param[in] src_n The size of the batches dimension of the source tensor
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] off_img (Not supported) CLImage object to the indirect buffer tensor (OFF_TENSOR_TYPE=IMAGE only)
+ * @param[in] off_img (Not supported) Read only cl_image object for the indirect buffer tensor. Included when OFF_TENSOR_TYPE=IMAGE
* @param[in] off_ptr Pointer to the indirect buffer tensor. Supported data type: INT32
* @param[in] off_stride_y Stride of the indirect buffer tensor in Y dimension (in bytes)
* @param[in] off_stride_z Stride of the indirect buffer tensor in Z dimension (in bytes)
@@ -143,8 +143,8 @@ __kernel void indirect_convolution_address_precalculation(
* @param[in] off_h The size of the height dimension of the indirect buffer tensor
* @param[in] off_n The size of the batches dimension of the indirect buffer tensor
* @param[in] off_offset_first_element_in_bytes The offset of the first element in the indirect buffer tensor
- * @param[out] dst_img (Not supported) CLImage object to the destination tensor (DST_TENSOR_TYPE=IMAGE only)
- * @param[out] dst_ptr Pointer to the destination tensor. Supported data type: same as the input tensor
+ * @param[out] dst_img (Not supported) Write only cl_image object for the destination tensor. Included when DST_TENSOR_TYPE=IMAGE
+ * @param[out] dst_ptr Pointer to the destination tensor. Supported data type: same as @p src_ptr
* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
* @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
* @param[in] dst_stride_w Stride of the destination tensor in W dimension (in bytes)
@@ -153,8 +153,8 @@ __kernel void indirect_convolution_address_precalculation(
* @param[in] dst_h The size of the height dimension of the destination tensor
* @param[in] dst_n The size of the batches dimension of the destination tensor
* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
- * @param[out] wei_img (Optional) CLImage object to the destination tensor (WEI_TENSOR_TYPE=IMAGE only)
- * @param[out] wei_ptr Pointer to the weights tensor. Supported data type: same as the input tensor
+ * @param[out] wei_img (Optional) Read only cl_image object for the weights tensor. Included when WEI_TENSOR_TYPE=IMAGE
+ * @param[out] wei_ptr Pointer to the weights tensor. Supported data type: same as @p src_ptr
* @param[in] wei_stride_y Stride of the weights tensor in Y dimension (in bytes)
* @param[in] wei_stride_z Stride of the weights tensor in Z dimension (in bytes)
* @param[in] wei_stride_w Stride of the weights tensor in W dimension (in bytes)
@@ -163,23 +163,17 @@ __kernel void indirect_convolution_address_precalculation(
* @param[in] wei_h The size of the height dimension of the weights tensor
* @param[in] wei_n The size of the batches dimension of the weights tensor
* @param[in] wei_offset_first_element_in_bytes The offset of the first element in the weights tensor
- * @param[out] bia_img (Not supported) CLImage object to the destination tensor (BIA_TENSOR_TYPE=IMAGE only)
- * @param[out] bia_ptr (Optional) Pointer to the bias tensor. Supported data type: same as the input tensor
- * @param[in] bia_stride_y (Optional) Stride of the bias tensor in Y dimension (in bytes)
- * @param[in] bia_stride_z (Optional) Stride of the bias tensor in Z dimension (in bytes)
- * @param[in] bia_stride_w (Optional) Stride of the bias tensor in W dimension (in bytes)
- * @param[in] bia_c (Optional) The size of the channels dimension of the bias tensor
- * @param[in] bia_w (Optional) The size of the width dimension of the bias tensor
- * @param[in] bia_h (Optional) The size of the height dimension of the bias tensor
- * @param[in] bia_n (Optional) The size of the batches dimension of the bias tensor
- * @param[in] bia_offset_first_element_in_bytes (Optional) The offset of the first element in the bias tensor
+ * @param[in] bia_ptr (Optional) Pointer to the bias tensor Supported data type: same as @p src_ptr
+ * @param[in] bia_stride_x (Optional) Stride of the bias tensor in X dimension (in bytes)
+ * @param[in] bia_step_x (Optional) bia_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] bia_offset_first_element_in_bytes (Optional) The offset of the first element in the bias matrix
*/
//! @endcond
__kernel void indirect_convolution_nhwc(
- TENSOR4D_T(src, SRC_TENSOR_TYPE),
- TENSOR4D_T(off, OFF_TENSOR_TYPE),
- TENSOR4D_T(dst, DST_TENSOR_TYPE),
- TENSOR4D_T(wei, WEI_TENSOR_TYPE)
+ TENSOR4D_RO_T(src, SRC_TENSOR_TYPE),
+ TENSOR4D_RO_T(off, OFF_TENSOR_TYPE),
+ TENSOR4D_WO_T(dst, DST_TENSOR_TYPE),
+ TENSOR4D_RO_T(wei, WEI_TENSOR_TYPE)
#if defined(HAS_BIAS)
,
VECTOR_DECLARATION(bia)
diff --git a/src/core/CL/cl_kernels/nhwc/scale.cl b/src/core/CL/cl_kernels/nhwc/scale.cl
index f6a3e0971b..e071b0f192 100644
--- a/src/core/CL/cl_kernels/nhwc/scale.cl
+++ b/src/core/CL/cl_kernels/nhwc/scale.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2022 Arm Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -61,8 +61,8 @@
*/
//! @endcond
__kernel void scale_nearest_neighbour_nhwc(
- TENSOR4D_T(src, SRC_TENSOR_TYPE),
- TENSOR4D_T(dst, DST_TENSOR_TYPE),
+ TENSOR4D_RO_T(src, SRC_TENSOR_TYPE),
+ TENSOR4D_WO_T(dst, DST_TENSOR_TYPE),
const float scale_x,
const float scale_y)
{
@@ -128,31 +128,33 @@ __kernel void scale_nearest_neighbour_nhwc(
* - The source offset e.g. -DOFFSET=4
* - The source scale e.g. -DSCALE=4
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: U8/S16/F16/F32.
- * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] src_stride_w Stride of the source tensor in W dimension (in bytes)
- * @param[in] src_c The size of the channels dimension of the source tensor
- * @param[in] src_w The size of the width dimension of the source tensor
- * @param[in] src_h The size of the height dimension of the source tensor
- * @param[in] src_n The size of the batches dimension of the source tensor
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] dst_ptr Pointer to the destination tensor. Supported data types: U8/S16/F16/F32.
- * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] dst_stride_w Stride of the destination tensor in W dimension (in bytes)
- * @param[in] dst_c The size of the channels dimension of the destination tensor
- * @param[in] dst_w The size of the width dimension of the destination tensor
- * @param[in] dst_h The size of the height dimension of the destination tensor
- * @param[in] dst_n The size of the batches dimension of the destination tensor
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
- * @param[in] scale_x The scale value to apply on the source width
- * @param[in] scale_y The scale value to apply on the source height
+ * @param[in] src_img (Not supported) Read only cl_image object for the source tensor. Included when SRC_TENSOR_TYPE=IMAGE
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: U8/S16/F16/F32.
+ * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
+ * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
+ * @param[in] src_stride_w Stride of the source tensor in W dimension (in bytes)
+ * @param[in] src_c The size of the channels dimension of the source tensor
+ * @param[in] src_w The size of the width dimension of the source tensor
+ * @param[in] src_h The size of the height dimension of the source tensor
+ * @param[in] src_n The size of the batches dimension of the source tensor
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[out] dst_img (Not supported) Write only cl_image object for the destination tensor. Included when DST_TENSOR_TYPE=IMAGE
+ * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: U8/S16/F16/F32.
+ * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in] dst_stride_w Stride of the destination tensor in W dimension (in bytes)
+ * @param[in] dst_c The size of the channels dimension of the destination tensor
+ * @param[in] dst_w The size of the width dimension of the destination tensor
+ * @param[in] dst_h The size of the height dimension of the destination tensor
+ * @param[in] dst_n The size of the batches dimension of the destination tensor
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ * @param[in] scale_x The scale value to apply on the source width
+ * @param[in] scale_y The scale value to apply on the source height
*/
//! @endcond
__kernel void scale_bilinear_nhwc(
- TENSOR4D_T(src, SRC_TENSOR_TYPE),
- TENSOR4D_T(dst, DST_TENSOR_TYPE),
+ TENSOR4D_RO_T(src, SRC_TENSOR_TYPE),
+ TENSOR4D_WO_T(dst, DST_TENSOR_TYPE),
const float scale_x,
const float scale_y)
{
@@ -224,11 +226,11 @@ __kernel void scale_bilinear_nhwc(
const float a1 = (yi_f - (float)yi);
const float b1 = (1.f - a1);
- out[0].v = CONVERT_SAT((CONVERT(in00[0].v, VEC_DATA_TYPE(float, N0)) * b * b1) +
- (CONVERT(in01[0].v, VEC_DATA_TYPE(float, N0)) * a * b1) +
- (CONVERT(in10[0].v, VEC_DATA_TYPE(float, N0)) * b * a1) +
- (CONVERT(in11[0].v, VEC_DATA_TYPE(float, N0)) * a * a1),
- VEC_DATA_TYPE(DST_DATA_TYPE, N0));
+ out[0].v = CONVERT_SAT((CONVERT(in00[0].v, VEC_DATA_TYPE(float, N0)) * b * b1) +
+ (CONVERT(in01[0].v, VEC_DATA_TYPE(float, N0)) * a * b1) +
+ (CONVERT(in10[0].v, VEC_DATA_TYPE(float, N0)) * b * a1) +
+ (CONVERT(in11[0].v, VEC_DATA_TYPE(float, N0)) * a * a1),
+ VEC_DATA_TYPE(DST_DATA_TYPE, N0));
#endif // defined(IS_FLOATING_POINT)
TILE(uint, 1, 1, dst_indirect_y);
@@ -240,4 +242,4 @@ __kernel void scale_bilinear_nhwc(
T_STORE_INDIRECT_WIDTH_SELECT(DST_DATA_TYPE, 1, N0, PARTIAL_N0, DST_TENSOR_TYPE, dst, cout, dst_stride_y, x_cond, out, dst_indirect_y);
}
-#endif /* SCALE_BILINEAR */ \ No newline at end of file
+#endif /* SCALE_BILINEAR */
diff --git a/src/core/CL/cl_kernels/nhwc/transposed_convolution.cl b/src/core/CL/cl_kernels/nhwc/transposed_convolution.cl
index fe6182fc95..1393537283 100644
--- a/src/core/CL/cl_kernels/nhwc/transposed_convolution.cl
+++ b/src/core/CL/cl_kernels/nhwc/transposed_convolution.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -64,7 +64,7 @@
* - The weights offset e.g. -DWEI_OFFSET=4
* - The quantized zero value e.g. -DZERO_VALUE=4
*
- *
+ * @param[in] src_img (Not supported) Read only cl_image object for the source tensor. Included when SRC_TENSOR_TYPE=IMAGE
* @param[in] src_ptr Pointer to the source tensor. Supported data type: F16/F32
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
@@ -73,6 +73,7 @@
* @param[in] src_w The size of the width dimension of the source tensor
* @param[in] src_h The size of the height dimension of the source tensor
* @param[in] src_n The size of the batches dimension of the source tensor
+ * @param[out] dst_img (Not supported) Write only cl_image object for the destination tensor. Included when DST_TENSOR_TYPE=IMAGE
* @param[out] dst_ptr Pointer to the destination tensor. Supported data type: same as @p src_ptr
* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
* @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
@@ -82,6 +83,7 @@
* @param[in] dst_h The size of the height dimension of the destination tensor
* @param[in] dst_n The size of the batches dimension of the destination tensor
* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ * @param[in] wei_img (Not supported) Read only cl_image object for the weights tensor. Included when WEI_TENSOR_TYPE=IMAGE
* @param[in] wei_ptr Pointer to the weights tensor. Supported data type: same as @p src_ptr
* @param[in] wei_stride_y Stride of the weights tensor in Y dimension (in bytes)
* @param[in] wei_stride_z Stride of the weights tensor in Z dimension (in bytes)
@@ -98,9 +100,9 @@
*/
//! @endcond
__kernel void transposed_convolution_nhwc(
- TENSOR4D_T(src, SRC_TENSOR_TYPE),
- TENSOR4D_T(dst, DST_TENSOR_TYPE),
- TENSOR4D_T(wei, WEI_TENSOR_TYPE)
+ TENSOR4D_RO_T(src, SRC_TENSOR_TYPE),
+ TENSOR4D_WO_T(dst, DST_TENSOR_TYPE),
+ TENSOR4D_RO_T(wei, WEI_TENSOR_TYPE)
#if defined(HAS_BIAS)
,
VECTOR_DECLARATION(bia)
@@ -292,4 +294,4 @@ __kernel void transposed_convolution_nhwc(
#undef _IDST_HEIGHT
#undef _IDST_CHANNELS
#undef _IY_MULTIPLIER
-} \ No newline at end of file
+}
diff --git a/src/core/CL/cl_kernels/tile_helpers.h b/src/core/CL/cl_kernels/tile_helpers.h
index acc174d04f..507e172dfb 100644
--- a/src/core/CL/cl_kernels/tile_helpers.h
+++ b/src/core/CL/cl_kernels/tile_helpers.h
@@ -130,8 +130,44 @@
uint name##_offset_first_element_in_bytes
#define TENSOR4D_T_STR(name, type) TENSOR4D_T_##type(name)
+
+/** Legacy tensor 4D arguments
+ *
+ * @param[in] name Tensor name. The tensor name is the prefix of the tensor components
+ * @param[in] type Tensor type (BUFFER or IMAGE)
+ */
#define TENSOR4D_T(name, type) TENSOR4D_T_STR(name, type)
+#define TENSOR4D_RO_T_IMAGE(name) \
+ __read_only image2d_t name##_img, \
+ TENSOR4D_T_BUFFER(name)
+
+#define TENSOR4D_RO_T_BUFFER(name) TENSOR4D_T_BUFFER(name)
+
+#define TENSOR4D_RO_T_STR(name, type) TENSOR4D_RO_T_##type(name)
+
+/** Read-Only (RO) tensor 4D.
+ *
+ * @param[in] name Tensor name. The tensor name is the prefix of the tensor components
+ * @param[in] type Tensor type (BUFFER or IMAGE)
+ */
+#define TENSOR4D_RO_T(name, type) TENSOR4D_RO_T_STR(name, type)
+
+#define TENSOR4D_WO_T_IMAGE(name) \
+ __write_only image2d_t name##_img, \
+ TENSOR4D_T_BUFFER(name)
+
+#define TENSOR4D_WO_T_BUFFER(name) TENSOR4D_T_BUFFER(name)
+
+#define TENSOR4D_WO_T_STR(name, type) TENSOR4D_WO_T_##type(name)
+
+/** Write-Only (WO) tensor 4D.
+ *
+ * @param[in] name Tensor name. The tensor name is the prefix of the tensor components
+ * @param[in] type Tensor type (BUFFER or IMAGE)
+ */
+#define TENSOR4D_WO_T(name, type) TENSOR4D_WO_T_STR(name, type)
+
#define TENSOR3D_T_IMAGE(name) \
__read_only image2d_t name##_img, \
__global uchar *name##_ptr, \
@@ -457,6 +493,25 @@
(0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (X) * sizeof(DATA_TYPE) + (Y) * (STRIDE_Y)))
#define V_LOAD_IMAGE(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y) READ_IMAGE2D(DATA_TYPE, CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT(WIDTH), TENSOR##_img, (X) / 4, (Y))
+/** Store a vector in global memory (tensor)
+ *
+ * @param[in] DATA_TYPE Data type
+ * @param[in] WIDTH Number of dst columns
+ * @param[in] TENSOR_TYPE Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image).
+ * In case of cl_image, only WIDTH multiples of 4 are supported (4, 8, 16)
+ * @param[in] TENSOR Tensor basename
+ * @param[in] X Starting X position
+ * @param[in] Y Starting Y position
+ * @param[in] STRIDE_Y Stride Y (in bytes)
+ * @param[in] VALUES Values to store in memory
+ */
+#define V_STORE(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y, VALUES) V_STORE_STR(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y, VALUES)
+#define V_STORE_STR(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y, VALUES) V_STORE_##TENSOR_TYPE(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y, VALUES)
+#define V_STORE_BUFFER(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y, VALUES) \
+ VSTORE(WIDTH) \
+ (VALUES, 0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (X) * sizeof(DATA_TYPE) + (Y) * (STRIDE_Y)))
+#define V_STORE_IMAGE(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y, VALUES) WRITE_IMAGE2D(DATA_TYPE, CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT(WIDTH), TENSOR##_img, (X) / 4, (Y), VALUES)
+
/** Load a tile from global memory (tensor)
*
* @param[in] DATA_TYPE Data type
@@ -658,7 +713,8 @@
* @param[in] DATA_TYPE Data type
* @param[in] TILE_AREA Number of elements to load from Y (height) dimension * Number of elements to load from X (width) dimension
* @param[in] TILE_CHANNELS Number of elements to load from C (channel) dimension
- * @param[in] TENSOR_TYPE Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). Currently BUFFER only is supported
+ * @param[in] TENSOR_TYPE Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image).
+ * When TENSOR_TYPE=IMAGE, the if condition for the out-of-bound check can be skipped
* In case of cl_image, only TILE_CHANNELS multiples of 4 are supported (4, 8, 16)
* @param[in] TENSOR Tensor basename
* @param[in] C Starting C index
@@ -667,15 +723,25 @@
* 16 is the maximum indirect buffer size.
* @param[out] dst Output tile
*/
-#define T_LOAD2D_INDIRECT(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst) \
- ({ \
- LOOP_UNROLLING(int, _i, 0, 1, TILE_AREA, \
- { \
- if(yi[0].s[_i] >= 0) \
- { \
- dst[_i].v = V_LOAD(DATA_TYPE, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, yi[0].s[_i], STRIDE_Y); \
- } \
- }) \
+#define T_LOAD2D_INDIRECT(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst) T_LOAD2D_INDIRECT_STR(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst)
+#define T_LOAD2D_INDIRECT_STR(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst) T_LOAD2D_INDIRECT_##TENSOR_TYPE(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst)
+#define T_LOAD2D_INDIRECT_BUFFER(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst) \
+ ({ \
+ LOOP_UNROLLING(int, _i, 0, 1, TILE_AREA, \
+ { \
+ if(yi[0].s[_i] >= 0) \
+ { \
+ dst[_i].v = V_LOAD(DATA_TYPE, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, yi[0].s[_i], STRIDE_Y); \
+ } \
+ }) \
+ })
+
+#define T_LOAD2D_INDIRECT_IMAGE(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst) \
+ ({ \
+ LOOP_UNROLLING(int, _i, 0, 1, TILE_AREA, \
+ { \
+ dst[_i].v = V_LOAD(DATA_TYPE, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, yi[0].s[_i], STRIDE_Y); \
+ }) \
})
/** Load a tile from global memory (tensor) when the tensor is stored using a NDHWC layout using indirect X, Y and Z coordinates
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
index cded31936c..2d21a6eff0 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -377,7 +377,7 @@ void CLDepthwiseConvolutionLayerNativeKernel::run(const Window &window, cl::Comm
const size_t image_h = _input->info()->dimension(1) * _input->info()->dimension(2) * _input->info()->dimension(3);
const TensorShape shape2d(image_w, image_h);
const size_t image_row_pitch = _input->info()->strides_in_bytes()[1];
- input_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), _input->cl_buffer(), shape2d, _input->info()->data_type(), image_row_pitch);
+ input_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), _input->cl_buffer(), shape2d, _input->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
}
if(_export_weights_to_cl_image)
@@ -386,7 +386,8 @@ void CLDepthwiseConvolutionLayerNativeKernel::run(const Window &window, cl::Comm
const size_t image_h = _weights->info()->dimension(1) * _weights->info()->dimension(2) * _weights->info()->dimension(3);
const TensorShape shape2d(image_w, image_h);
const size_t image_row_pitch = _weights->info()->strides_in_bytes()[1];
- weights_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), _weights->cl_buffer(), shape2d, _weights->info()->data_type(), image_row_pitch);
+ weights_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), _weights->cl_buffer(), shape2d, _weights->info()->data_type(), image_row_pitch,
+ CLImage2DType::ReadOnly);
}
}
@@ -401,7 +402,7 @@ void CLDepthwiseConvolutionLayerNativeKernel::run(const Window &window, cl::Comm
{
_kernel.setArg(idx++, weights_cl_image);
}
- add_4D_tensor_argument(idx, _weights, slice);
+ add_4d_tensor_nhwc_argument(idx, _weights);
if(_is_quantized)
{
add_1D_tensor_argument(idx, _output_multipliers, slice);
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
index 93fbdfed63..022d4685fe 100644
--- a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
+++ b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -89,7 +89,7 @@ inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, const GpuKer
{
const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) * tensor->info()->dimension(2) * tensor->info()->dimension(3));
const size_t image_row_pitch = tensor->info()->strides_in_bytes()[1];
- cl::Image2D tensor_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d, tensor->info()->data_type(), image_row_pitch);
+ cl::Image2D tensor_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d, tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
cl_images.push_back(tensor_image2d);
_kernel.setArg(idx++, tensor_image2d);
break;
@@ -105,7 +105,7 @@ inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, const GpuKer
{
const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) * tensor->info()->dimension(2) * tensor->info()->dimension(3));
const size_t image_row_pitch = tensor->info()->strides_in_bytes()[1];
- cl::Image2D tensor_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d, tensor->info()->data_type(), image_row_pitch);
+ cl::Image2D tensor_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d, tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
cl_images.push_back(tensor_image2d);
_kernel.setArg(idx++, tensor_image2d);
_kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2]));
@@ -135,7 +135,7 @@ inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, const GpuKer
const size_t image_stride_y = tensor->info()->strides_in_bytes()[1];
cl::Image2D tensor_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(),
- TensorShape(image_w, image_h), tensor->info()->data_type(), image_stride_y);
+ TensorShape(image_w, image_h), tensor->info()->data_type(), image_stride_y, CLImage2DType::ReadOnly);
cl_images.push_back(tensor_image2d);
_kernel.setArg(idx++, tensor_image2d);
diff --git a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp
index d9271e24d9..b66163c805 100644
--- a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp
+++ b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2022 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -64,6 +64,9 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, co
ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(channel_idx) != src->dimension(channel_idx), "Weights feature map dimension should match the respective src's one");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->num_dimensions() > 4, "Weights can be at most 4 dimensional");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.export_input_to_cl_image == true, "Export to CLImage is not supported for the input tensor");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.export_output_to_cl_image == true, "Export to CLImage is not supported for the output tensor");
+
if(data_layout == DataLayout::NCHW)
{
ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(width_idx) != weights->dimension(height_idx), "Weights should have same width and height");
@@ -210,14 +213,26 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT
const unsigned int pad_left = conv_info.pad_left();
const unsigned int pad_top = conv_info.pad_top();
- _export_to_cl_image = desc.export_weights_to_cl_image;
+ _export_weights_to_cl_image = desc.export_weights_to_cl_image;
+ _export_input_to_cl_image = desc.export_input_to_cl_image;
+ _export_output_to_cl_image = desc.export_output_to_cl_image;
// Update the padding for the weights tensor if we can export to cl_image
- if(_export_to_cl_image)
+ if(_export_weights_to_cl_image)
{
gemm::update_padding_for_cl_image(weights);
}
+ if(_export_output_to_cl_image)
+ {
+ gemm::update_padding_for_cl_image(dst);
+ }
+
+ if(_export_input_to_cl_image)
+ {
+ gemm::update_padding_for_cl_image(src);
+ }
+
if(biases != nullptr)
{
build_options.add_option(std::string("-DHAS_BIAS"));
@@ -241,7 +256,7 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT
build_options.add_option("-cl-fast-relaxed-math");
}
- build_options.add_option("-DSRC_TENSOR_TYPE=BUFFER");
+ build_options.add_option_if_else(_export_input_to_cl_image, "-DSRC_TENSOR_TYPE=IMAGE", "-DSRC_TENSOR_TYPE=BUFFER");
build_options.add_option("-DSRC_DATA_TYPE=" + get_cl_type_from_data_type(src->data_type()));
build_options.add_option("-DSRC_CHANNELS=" + support::cpp11::to_string(src->dimension(0)));
build_options.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(src->dimension(1)));
@@ -249,9 +264,9 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT
build_options.add_option("-DDST_CHANNELS=" + support::cpp11::to_string(dst->dimension(0)));
build_options.add_option("-DDST_WIDTH=" + support::cpp11::to_string(dst->dimension(1)));
build_options.add_option("-DDST_HEIGHT=" + support::cpp11::to_string(dst->dimension(2)));
- build_options.add_option("-DDST_TENSOR_TYPE=BUFFER");
+ build_options.add_option_if_else(_export_output_to_cl_image, "-DDST_TENSOR_TYPE=IMAGE", "-DDST_TENSOR_TYPE=BUFFER");
build_options.add_option("-DDST_DATA_TYPE=" + get_cl_type_from_data_type(dst_data_type));
- build_options.add_option_if_else(_export_to_cl_image, "-DWEI_TENSOR_TYPE=IMAGE", "-DWEI_TENSOR_TYPE=BUFFER");
+ build_options.add_option_if_else(_export_weights_to_cl_image, "-DWEI_TENSOR_TYPE=IMAGE", "-DWEI_TENSOR_TYPE=BUFFER");
build_options.add_option("-DWEI_WIDTH=" + support::cpp11::to_string(weights->dimension(width_idx)));
build_options.add_option("-DWEI_HEIGHT=" + support::cpp11::to_string(weights->dimension(height_idx)));
build_options.add_option("-DWEI_DATA_TYPE=" + get_cl_type_from_data_type(weights->data_type()));
@@ -307,7 +322,7 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT
}
else
{
- _export_to_cl_image = false;
+ _export_weights_to_cl_image = false;
kernel_name << "direct_convolution_nchw";
build_options.add_option_if(biases != nullptr, std::string("-DHAS_BIAS"));
@@ -399,8 +414,10 @@ void ClDirectConv2dKernel::run_op(ITensorPack &tensors, const Window &window, cl
if(_data_layout == DataLayout::NHWC)
{
cl::Image2D weights_cl_image;
+ cl::Image2D output_cl_image;
+ cl::Image2D input_cl_image;
- if(_export_to_cl_image)
+ if(_export_weights_to_cl_image)
{
const size_t image_w = weights->info()->dimension(0) / 4;
const size_t image_h = weights->info()->dimension(1) * weights->info()->dimension(2) * weights->info()->dimension(3);
@@ -408,13 +425,43 @@ void ClDirectConv2dKernel::run_op(ITensorPack &tensors, const Window &window, cl
const size_t image_row_pitch = weights->info()->strides_in_bytes()[1];
// Export cl_buffer to cl_image
- weights_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), weights->cl_buffer(), shape2d, weights->info()->data_type(), image_row_pitch);
+ weights_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), weights->cl_buffer(), shape2d, weights->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
+ }
+
+ if(_export_output_to_cl_image)
+ {
+ const size_t image_w = dst->info()->dimension(0) / 4;
+ const size_t image_h = dst->info()->dimension(1) * dst->info()->dimension(2) * dst->info()->dimension(3);
+ const TensorShape shape2d(image_w, image_h);
+ const size_t image_row_pitch = dst->info()->strides_in_bytes()[1];
+
+ // Export cl_buffer to cl_image
+ output_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), dst->cl_buffer(), shape2d, dst->info()->data_type(), image_row_pitch, CLImage2DType::WriteOnly);
+ }
+
+ if(_export_input_to_cl_image)
+ {
+ const size_t image_w = src->info()->dimension(0) / 4;
+ const size_t image_h = src->info()->dimension(1) * src->info()->dimension(2) * src->info()->dimension(3);
+ const TensorShape shape2d(image_w, image_h);
+ const size_t image_row_pitch = src->info()->strides_in_bytes()[1];
+
+ // Export cl_buffer to cl_image
+ input_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), src->cl_buffer(), shape2d, src->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
}
unsigned int idx = 0;
+ if(_export_input_to_cl_image)
+ {
+ _kernel.setArg(idx++, input_cl_image);
+ }
add_4d_tensor_nhwc_argument(idx, src);
+ if(_export_output_to_cl_image)
+ {
+ _kernel.setArg(idx++, output_cl_image);
+ }
add_4d_tensor_nhwc_argument(idx, dst);
- if(_export_to_cl_image)
+ if(_export_weights_to_cl_image)
{
_kernel.setArg(idx++, weights_cl_image);
}
diff --git a/src/gpu/cl/kernels/ClDirectConv2dKernel.h b/src/gpu/cl/kernels/ClDirectConv2dKernel.h
index 0cb8aebbe1..25171a0536 100644
--- a/src/gpu/cl/kernels/ClDirectConv2dKernel.h
+++ b/src/gpu/cl/kernels/ClDirectConv2dKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2022 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -84,7 +84,9 @@ public:
public:
DataLayout _data_layout{};
PadStrideInfo _conv_info{};
- bool _export_to_cl_image{ false };
+ bool _export_weights_to_cl_image{ false };
+ bool _export_output_to_cl_image{ false };
+ bool _export_input_to_cl_image{ false };
};
} // namespace kernels
} // namespace opencl
diff --git a/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp b/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp
index 6a450b652b..f74a5d87af 100644
--- a/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp
+++ b/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -360,7 +360,7 @@ void ClGemmMatrixMultiplyReshapedKernel::run_op(ITensorPack &tensors, const Wind
const TensorShape shape2d(src1->info()->dimension(0) / 4, src1->info()->dimension(1) * src1->info()->dimension(2));
const size_t image_row_pitch = src1->info()->strides_in_bytes()[1];
- src1_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), src1->cl_buffer(), shape2d, src1->info()->data_type(), image_row_pitch);
+ src1_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), src1->cl_buffer(), shape2d, src1->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
}
do
diff --git a/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp b/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp
index a8bcf8d6a1..efd0a95eab 100644
--- a/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp
+++ b/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -375,7 +375,7 @@ void ClGemmMatrixMultiplyReshapedOnlyRhsKernel::run_op(ITensorPack &tensors, con
const TensorShape shape2d(src1->info()->dimension(0) / 4, src1->info()->dimension(1) * src1->info()->dimension(2));
const size_t image_row_pitch = src1->info()->strides_in_bytes()[1];
- src1_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), src1->cl_buffer(), shape2d, src1->info()->data_type(), image_row_pitch);
+ src1_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), src1->cl_buffer(), shape2d, src1->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
}
do
diff --git a/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel.cpp b/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel.cpp
index fe46913517..f252afb06a 100644
--- a/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel.cpp
+++ b/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -325,7 +325,7 @@ void ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel::run_op(ITensorPack &tensors,
const TensorShape shape2d(src1->info()->dimension(0) / 4, src1->info()->dimension(1) * src1->info()->dimension(2));
const size_t image_row_pitch = src1->info()->strides_in_bytes()[1];
- src1_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), src1->cl_buffer(), shape2d, src1->info()->data_type(), image_row_pitch);
+ src1_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), src1->cl_buffer(), shape2d, src1->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
}
Window slice = window.first_slice_window_3D();
diff --git a/src/gpu/cl/kernels/ClIndirectConv2dKernel.cpp b/src/gpu/cl/kernels/ClIndirectConv2dKernel.cpp
index 3448377cb5..b8915cc406 100644
--- a/src/gpu/cl/kernels/ClIndirectConv2dKernel.cpp
+++ b/src/gpu/cl/kernels/ClIndirectConv2dKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -282,7 +282,7 @@ void ClIndirectConv2dKernel::run_op(ITensorPack &tensors, const Window &window,
const size_t image_row_pitch = weights->info()->strides_in_bytes()[1];
// Export cl_buffer to cl_image
- weights_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), weights->cl_buffer(), shape2d, weights->info()->data_type(), image_row_pitch);
+ weights_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), weights->cl_buffer(), shape2d, weights->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
}
unsigned int idx = 0;