From a3478a5b0c6fc62a0a2b5cef17422cab67e4195a Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Fri, 17 Jan 2020 16:36:46 +0000 Subject: COMPMID-2819 Unary Operation VTS/CTS failures on CL Change-Id: I49231bb66101244d05d6eb35bc644bcc8693aa34 Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/2602 Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas Tested-by: Georgios Pinitas --- src/core/CL/cl_kernels/elementwise_unary.cl | 24 ++++++++++++++-------- .../CL/kernels/CLElementWiseUnaryLayerKernel.cpp | 13 ++++++------ 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/src/core/CL/cl_kernels/elementwise_unary.cl b/src/core/CL/cl_kernels/elementwise_unary.cl index b496fcf562..e8a3fb778a 100644 --- a/src/core/CL/cl_kernels/elementwise_unary.cl +++ b/src/core/CL/cl_kernels/elementwise_unary.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -48,20 +48,28 @@ /** Applies element wise unary operator in a tensor. * * @param[in] in_ptr Pointer to the source image. Supported data types: F16/32. - * @param[in] in_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] in_step_x in_stride_x * number of elements along X processed per work item (in bytes) + * @param[in] in_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] in_step_x in_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] in_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] in_step_y in_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] in_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] in_step_z in_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] in_offset_first_element_in_bytes Offset of the first element in the source image * @param[out] out_ptr Pointer to the destination image. Supported data types: F16/32. * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] out_step_y out_stride_y * number of elements along Y processed per work item (in bytes) + * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] out_step_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] out_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] out_step_z out_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] out_offset_first_element_in_bytes Offset of the first element in the destination image */ __kernel void elementwise_unary( - VECTOR_DECLARATION(in), - VECTOR_DECLARATION(out)) + TENSOR3D_DECLARATION(in), + TENSOR3D_DECLARATION(out)) { - Vector in = CONVERT_TO_VECTOR_STRUCT(in); - Vector out = CONVERT_TO_VECTOR_STRUCT(out); + Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(in); + Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out); #if defined(VEC_SIZE) && defined(LAST_ACCESSED_X) // Check if access on width gets out of bounds diff --git a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp index c4ab50411a..543c8f325c 100644 --- a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp +++ b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -121,14 +121,15 @@ void CLElementWiseUnaryLayerKernel::run(const Window &window, cl::CommandQueue & ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimX); + Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); + Window slice = collapsed.first_slice_window_3D(); do { unsigned int idx = 0; - add_1D_tensor_argument(idx, _input, collapsed); - add_1D_tensor_argument(idx, _output, collapsed); - enqueue(queue, *this, collapsed, lws_hint()); + add_3D_tensor_argument(idx, _input, slice); + add_3D_tensor_argument(idx, _output, slice); + enqueue(queue, *this, slice, lws_hint()); } - while(window.slide_window_slice_1D(collapsed)); + while(collapsed.slide_window_slice_3D(slice)); } -- cgit v1.2.1