aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
diff options
context:
space:
mode:
authorGunes Bayir <gunes.bayir@arm.com>2022-11-21 21:46:50 +0000
committerGunes Bayir <gunes.bayir@arm.com>2022-11-28 15:02:59 +0000
commit7dc0234331f2150a6b4ac5c2b49de419870f7cf5 (patch)
tree4e514ce8dd98f022fcbde32ca756ddda375cab8c /src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
parent5d01681fe9aa8a04bd5431db9b2866b8d538dbae (diff)
downloadComputeLibrary-7dc0234331f2150a6b4ac5c2b49de419870f7cf5.tar.gz
Implement FP32/16 Depthwise Conv2d operator in dynamic fusion
This patch adds Depthwise Conv2d operator into dynamic fusion interface and adds the associated tests. Resolves: COMPMID-5517 Change-Id: I385c94dff7fd40c72b8337ef797e508df4499a82 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8678 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp')
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp23
1 files changed, 3 insertions, 20 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
index 6c4b8f52f2..bffb467ebb 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
@@ -46,31 +46,14 @@ std::string ClTemplateStore::get_name() const
std::string ClTemplateStore::get_component_code(const ComponentGroup &comp_group) const
{
ARM_COMPUTE_UNUSED(comp_group);
+
return R"_(
//------------------ START KERNEL {{meta_kernel_id}} STORE ---------------------
{
-// This also follows NHWC layout
-// g_ind_0 maps to global_id(0) maps to Channel
-// g_ind_1 maps to global_id(1) maps to Height and Weight (Collapsed Window)
-// g_ind_2 maps to global_id(2) maps to N / Batch
-#define _IDST_WIDTH {{dst}}_w
-#define _IDST_HEIGHT {{dst}}_h
- TILE(uint, M0, 1, dst_indirect_y);
-
- // Calculate the destination indirect Y
- LOOP_UNROLLING(int, i, 0, 1, M0,
- {
- dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)(_IDST_WIDTH * _IDST_HEIGHT) - 1);
- dst_indirect_y[i].v += g_ind_2 * (int)(_IDST_WIDTH * _IDST_HEIGHT);
- })
-
bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0;
- T_STORE_INDIRECT_WIDTH_SELECT({{DST_DATA_TYPE}}, M0, N0, PARTIAL_N0, {{DST_TENSOR_TYPE}}, {{dst}}, g_ind_0, {{dst}}_stride_y, x_cond, {{src}}, dst_indirect_y);
-
-#undef _IDST_WIDTH
-#undef _IDST_HEIGHT
- //------------------ END KERNEL {{meta_kernel_id}} STORE ---------------------
+ T_STORE_INDIRECT_WIDTH_SELECT({{DST_DATA_TYPE}}, M0, N0, PARTIAL_N0, {{DST_TENSOR_TYPE}}, {{dst}}, g_ind_0, {{dst}}_stride_y, x_cond, {{src}}, g_dst_indirect_y);
+//------------------ END KERNEL {{meta_kernel_id}} STORE ---------------------
}
)_";