From ad9a7ed2f9969381af0b9c97438a3402e16d9483 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Fri, 16 Sep 2022 14:14:21 +0100 Subject: Rework DepthwiseConvolution heuristic on OpenCL Resolves COMPMID-5632 Change-Id: I2bdbe69a610ca2510fbd74d5d412842679299762 Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8365 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Viet-Hoa Do Reviewed-by: Jakub Sujak Comments-Addressed: Arm Jenkins --- src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl') diff --git a/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl b/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl index 8b14b27643..8a8458798e 100644 --- a/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl +++ b/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl @@ -145,7 +145,7 @@ __kernel void dwc_native_fp_nhwc( }) // Load tile from the src tensor (TILE A) - T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, _IM0_A, _IN0_A, SRC_TENSOR_TYPE, src, bout, yi + yk * DILATION_Y, xi, (cout / DEPTH_MULTIPLIER), src_w, src_h, DILATION_X, 1, _IBOUNDARY_CHECK, a); + T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, _IM0_A, _IN0_A, SRC_TENSOR_TYPE, src, bout, yi + yk * DILATION_Y, xi, (cout / DEPTH_MULTIPLIER), SRC_WIDTH, SRC_HEIGHT, DILATION_X, 1, _IBOUNDARY_CHECK, a); TILE(WEI_DATA_TYPE, _IM0_B, _IN0_B, b); @@ -185,7 +185,7 @@ __kernel void dwc_native_fp_nhwc( { LOOP_UNROLLING(int, m0, 0, 1, M0, { - int xi_out = min(xo + M0 - 1 - m0, (int)(dst_w) - 1); + int xi_out = min(xo + M0 - 1 - m0, (int)(DST_WIDTH) - 1); VSTORE_PARTIAL(N0, PARTIAL_N0) (c[M0 - 1 - m0].v, 0, (__global DST_DATA_TYPE *)(dst_ptr + dst_offset_first_element_in_bytes + cout * sizeof(DST_DATA_TYPE) + (uint)xi_out * dst_stride_y + (uint)yo * dst_stride_z + (uint)bout * dst_stride_w)); }) @@ -194,7 +194,7 @@ __kernel void dwc_native_fp_nhwc( { LOOP_UNROLLING(int, m0, 0, 1, M0, { - int xi_out = min(xo + M0 - 1 - m0, (int)(dst_w) - 1); + int xi_out = min(xo + M0 - 1 - m0, (int)(DST_WIDTH) - 1); VSTORE(N0) (c[M0 - 1 - m0].v, 0, (__global DST_DATA_TYPE *)(dst_ptr + dst_offset_first_element_in_bytes + cout * sizeof(DST_DATA_TYPE) + (uint)xi_out * dst_stride_y + (uint)yo * dst_stride_z + (uint)bout * dst_stride_w)); }) -- cgit v1.2.1