aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2022-09-16 14:14:21 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2022-10-06 15:34:16 +0000
commitad9a7ed2f9969381af0b9c97438a3402e16d9483 (patch)
tree440ef7484418b49778e897bf00fb6396c24d0986 /src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl
parent3bedd2f031680f53e2982638adfe99a29dca8d06 (diff)
downloadComputeLibrary-ad9a7ed2f9969381af0b9c97438a3402e16d9483.tar.gz
Rework DepthwiseConvolution heuristic on OpenCL
Resolves COMPMID-5632 Change-Id: I2bdbe69a610ca2510fbd74d5d412842679299762 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8365 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl')
-rw-r--r--src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl6
1 files changed, 3 insertions, 3 deletions
diff --git a/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl b/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl
index 8b14b27643..8a8458798e 100644
--- a/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl
+++ b/src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl
@@ -145,7 +145,7 @@ __kernel void dwc_native_fp_nhwc(
})
// Load tile from the src tensor (TILE A)
- T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, _IM0_A, _IN0_A, SRC_TENSOR_TYPE, src, bout, yi + yk * DILATION_Y, xi, (cout / DEPTH_MULTIPLIER), src_w, src_h, DILATION_X, 1, _IBOUNDARY_CHECK, a);
+ T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, _IM0_A, _IN0_A, SRC_TENSOR_TYPE, src, bout, yi + yk * DILATION_Y, xi, (cout / DEPTH_MULTIPLIER), SRC_WIDTH, SRC_HEIGHT, DILATION_X, 1, _IBOUNDARY_CHECK, a);
TILE(WEI_DATA_TYPE, _IM0_B, _IN0_B, b);
@@ -185,7 +185,7 @@ __kernel void dwc_native_fp_nhwc(
{
LOOP_UNROLLING(int, m0, 0, 1, M0,
{
- int xi_out = min(xo + M0 - 1 - m0, (int)(dst_w) - 1);
+ int xi_out = min(xo + M0 - 1 - m0, (int)(DST_WIDTH) - 1);
VSTORE_PARTIAL(N0, PARTIAL_N0)
(c[M0 - 1 - m0].v, 0, (__global DST_DATA_TYPE *)(dst_ptr + dst_offset_first_element_in_bytes + cout * sizeof(DST_DATA_TYPE) + (uint)xi_out * dst_stride_y + (uint)yo * dst_stride_z + (uint)bout * dst_stride_w));
})
@@ -194,7 +194,7 @@ __kernel void dwc_native_fp_nhwc(
{
LOOP_UNROLLING(int, m0, 0, 1, M0,
{
- int xi_out = min(xo + M0 - 1 - m0, (int)(dst_w) - 1);
+ int xi_out = min(xo + M0 - 1 - m0, (int)(DST_WIDTH) - 1);
VSTORE(N0)
(c[M0 - 1 - m0].v, 0, (__global DST_DATA_TYPE *)(dst_ptr + dst_offset_first_element_in_bytes + cout * sizeof(DST_DATA_TYPE) + (uint)xi_out * dst_stride_y + (uint)yo * dst_stride_z + (uint)bout * dst_stride_w));
})