From e2e6d745c940cdfd8c3340fd1227dbef1badfb3c Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Wed, 1 Mar 2023 15:46:10 +0000 Subject: Fix direct conv2d in dynamic fusion * Put input and output tensor shape value directly to the CL code. * Use texture for weights when it is possible. Resolves: COMPMID-5938 Signed-off-by: Viet-Hoa Do Change-Id: Ib53b310a80ce857eac36564b352136fdde55b131 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9249 Reviewed-by: SiCong Li Reviewed-by: Jakub Sujak Benchmark: Arm Jenkins Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- .../template_writer/cl/ClTemplateDirectConv2d.cpp | 26 +++++++++++++++------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp') diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp index e69103e263..ca531fe28e 100644 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp @@ -91,12 +91,12 @@ TILE(uint, M0, 1, g_dst_indirect_y); { #define _IWEI_WIDTH {{WEI_WIDTH}} #define _IWEI_HEIGHT {{WEI_HEIGHT}} -#define _ISRC_WIDTH {{src}}_w -#define _ISRC_HEIGHT {{src}}_h -#define _ISRC_CHANNELS {{src}}_c -#define _IDST_WIDTH {{arg_dst}}_w -#define _IDST_HEIGHT {{arg_dst}}_h -#define _IDST_CHANNELS {{arg_dst}}_c +#define _ISRC_WIDTH {{SRC_WIDTH}} +#define _ISRC_HEIGHT {{SRC_HEIGHT}} +#define _ISRC_CHANNELS {{SRC_CHANNELS}} +#define _IDST_WIDTH {{DST_WIDTH}} +#define _IDST_HEIGHT {{DST_HEIGHT}} +#define _IDST_CHANNELS {{DST_CHANNELS}} #define _IY_MULTIPLIER (_IWEI_WIDTH * _IWEI_HEIGHT) TILE(int, M0, 1, xi); @@ -214,8 +214,8 @@ code += R"_( code += R"_( LOOP_UNROLLING(int, i, 0, 1, M0, { - g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{arg_dst}}_w * {{arg_dst}}_h) - 1); - g_dst_indirect_y[i].v += g_ind_2 * (int)({{arg_dst}}_w * {{arg_dst}}_h); + g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{DST_WIDTH}} * {{DST_HEIGHT}}) - 1); + g_dst_indirect_y[i].v += g_ind_2 * (int)({{DST_WIDTH}} * {{DST_HEIGHT}}); }) } //------------------ END KERNEL {{meta_kernel_id}} --------------------- @@ -294,9 +294,19 @@ TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable, } const auto width_idx = 1; const auto height_idx = 2; + const auto channel_idx = 0; + + lut["SRC_WIDTH"] = _src->dimension(width_idx); + lut["SRC_HEIGHT"] = _src->dimension(height_idx); + lut["SRC_CHANNELS"] = _src->dimension(channel_idx); + lut["WEI_WIDTH"] = _weight->dimension(width_idx); lut["WEI_HEIGHT"] = _weight->dimension(height_idx); + lut["DST_WIDTH"] = _dst->dimension(width_idx); + lut["DST_HEIGHT"] = _dst->dimension(height_idx); + lut["DST_CHANNELS"] = _dst->dimension(channel_idx); + lut["STRIDE_X"] = _attributes.stride().x(); lut["STRIDE_Y"] = _attributes.stride().y(); -- cgit v1.2.1