diff options
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp')
-rw-r--r-- | src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp | 23 |
1 files changed, 3 insertions, 20 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp index 6c4b8f52f2..bffb467ebb 100644 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp +++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp @@ -46,31 +46,14 @@ std::string ClTemplateStore::get_name() const std::string ClTemplateStore::get_component_code(const ComponentGroup &comp_group) const { ARM_COMPUTE_UNUSED(comp_group); + return R"_( //------------------ START KERNEL {{meta_kernel_id}} STORE --------------------- { -// This also follows NHWC layout -// g_ind_0 maps to global_id(0) maps to Channel -// g_ind_1 maps to global_id(1) maps to Height and Weight (Collapsed Window) -// g_ind_2 maps to global_id(2) maps to N / Batch -#define _IDST_WIDTH {{dst}}_w -#define _IDST_HEIGHT {{dst}}_h - TILE(uint, M0, 1, dst_indirect_y); - - // Calculate the destination indirect Y - LOOP_UNROLLING(int, i, 0, 1, M0, - { - dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)(_IDST_WIDTH * _IDST_HEIGHT) - 1); - dst_indirect_y[i].v += g_ind_2 * (int)(_IDST_WIDTH * _IDST_HEIGHT); - }) - bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0; - T_STORE_INDIRECT_WIDTH_SELECT({{DST_DATA_TYPE}}, M0, N0, PARTIAL_N0, {{DST_TENSOR_TYPE}}, {{dst}}, g_ind_0, {{dst}}_stride_y, x_cond, {{src}}, dst_indirect_y); - -#undef _IDST_WIDTH -#undef _IDST_HEIGHT - //------------------ END KERNEL {{meta_kernel_id}} STORE --------------------- + T_STORE_INDIRECT_WIDTH_SELECT({{DST_DATA_TYPE}}, M0, N0, PARTIAL_N0, {{DST_TENSOR_TYPE}}, {{dst}}, g_ind_0, {{dst}}_stride_y, x_cond, {{src}}, g_dst_indirect_y); +//------------------ END KERNEL {{meta_kernel_id}} STORE --------------------- } )_"; |