From 1f8db2be160718979d38e3671a135d22e83cc5c2 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Mon, 10 Dec 2018 16:19:20 +0000 Subject: COMPMID-1786 Dispatch a single OpenCL when running CLScaleKernel with NHWC with batch_size!=1 Change-Id: Ib5ea76c1ba7a7add1f050ca9168091bd30749725 Reviewed-on: https://review.mlplatform.org/377 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Reviewed-by: Gian Marco Iodice --- src/core/CL/cl_kernels/scale_quantized.cl | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'src/core/CL/cl_kernels/scale_quantized.cl') diff --git a/src/core/CL/cl_kernels/scale_quantized.cl b/src/core/CL/cl_kernels/scale_quantized.cl index 3211e7efa1..86dbf608f4 100644 --- a/src/core/CL/cl_kernels/scale_quantized.cl +++ b/src/core/CL/cl_kernels/scale_quantized.cl @@ -85,12 +85,14 @@ __kernel void scale_bilinear_quantized_nchw( vstore4(bilinear_interpolate_with_border_quantized(&in, tc, input_width, input_height, BORDER_SIZE, SCALE, OFFSET), 0, (__global DATA_TYPE *)out.ptr); } +#if defined(DEPTH_OUT) /** Performs scale on an image interpolating with the BILINEAR method. (NHWC) * * @note Sampling policy to be used is passed as -DSAMPLING_POLICY_(TYPE) e.g. -DSAMPLING_POLICY_TOP_LEFT * @note Scale value for QASYMM8 data type to used is passed as -DSCALE= e.g. -DSCALE=0.5 * @note Offset value for QASYMM8 data type to used is passed as -DOFFSET= e.g. -DOFFSET=1 * @note If border mode replicate is used, is should be passed as -DBORDER_MODE_REPLICATE + * @note Output tensor's depth should be given as a preprocessor argument using -DDEPTH_OUT=size. e.g. -DDEPTH=16 * * @param[in] in_ptr Pointer to the source image. Supported data types: QASYMM8. * @param[in] in_stride_x Stride of the source image in X dimension (in bytes) @@ -114,22 +116,22 @@ __kernel void scale_bilinear_quantized_nchw( * @param[in] scale_y The scale factor along y dimension */ __kernel void scale_bilinear_quantized_nhwc( - TENSOR3D_DECLARATION(in), - TENSOR3D_DECLARATION(out), + TENSOR4D_DECLARATION(in), + TENSOR4D_DECLARATION(out), const float input_width, const float input_height, const float scale_x, const float scale_y) { - Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(in); - Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out); + Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(in, 0); + Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(out, DEPTH_OUT); #ifdef SAMPLING_POLICY_TOP_LEFT const float new_x = get_global_id(1) * scale_x; - const float new_y = get_global_id(2) * scale_y; + const float new_y = (get_global_id(2) % DEPTH_OUT) * scale_y; #elif SAMPLING_POLICY_CENTER const float new_x = (get_global_id(1) + 0.5f) * scale_x - 0.5f; - const float new_y = (get_global_id(2) + 0.5f) * scale_y - 0.5f; + const float new_y = ((get_global_id(2) % DEPTH_OUT) + 0.5f) * scale_y - 0.5f; #else /* SAMPLING_POLICY */ #error("Unsupported sampling policy"); #endif /* SAMPLING_POLICY */ @@ -150,10 +152,10 @@ __kernel void scale_bilinear_quantized_nhwc( clamped_x1_ = select(clamped_x1_, 0.0f - BORDER_SIZE, new_xf + 1 < 0.f || new_xf + 1 > input_width - 1 || new_yf < 0.f || new_yf > input_height - 1); #endif /* BORDER_MODE_REPLICATE */ - int4 ins = (int4)(*((__global DATA_TYPE *)tensor3D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y))), - *((__global DATA_TYPE *)tensor3D_offset(&in, get_global_id(0), convert_int(clamped_x1_), convert_int(clamped_y))), - *((__global DATA_TYPE *)tensor3D_offset(&in, get_global_id(0), convert_int(clamped_x_), convert_int(clamped_y1))), - *((__global DATA_TYPE *)tensor3D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y1)))); + int4 ins = (int4)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))), + *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1_), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))), + *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x_), convert_int(clamped_y1), (get_global_id(2) / DEPTH_OUT))), + *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y1), (get_global_id(2) / DEPTH_OUT)))); const float a = new_x - new_xf; const float b = 1.f - a; @@ -167,3 +169,4 @@ __kernel void scale_bilinear_quantized_nhwc( *((__global DATA_TYPE *)out.ptr) = res; } +#endif /* defined(DEPTH_OUT) */ \ No newline at end of file -- cgit v1.2.1