aboutsummaryrefslogtreecommitdiff
path: root/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp')
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp32
1 files changed, 31 insertions, 1 deletions
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp
index bbdf8df0a3..34b735edc9 100644
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp
@@ -41,7 +41,7 @@ ComponentType ClElementwiseAddKernelComponent::get_component_type() const
std::set<std::string> ClElementwiseAddKernelComponent::get_headers_list() const
{
- return std::set<std::string> { "gemm_helpers.h", "repeat.h" };
+ return std::set<std::string> { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "gemm_helpers.h", "repeat.h", "tile_helpers.h" };
}
Window ClElementwiseAddKernelComponent::get_window() const
@@ -78,6 +78,36 @@ std::string ClElementwiseAddKernelComponent::get_component_code() const
LOAD_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, addend, addend_addr, 0, {{addend}}_stride_y, g_zero, PARTIAL_LOAD_M0, PARTIAL_LOAD_N0, PARTIAL_COND_Y, PARTIAL_COND_X); \
MIXED_PRECISION_ELTWISE_OP_BLOCK(ADD_X_POS_0, M0, N0, {{acc}}, addend, DATA_TYPE_ACCUMULATOR, addend_hp);
}
+
+ // Workaround for the discrepancy between tiles and repeats
+#if defined(IS_TILED)
+ {{acc}}[0].v = {{acc}}0;
+#if M0 >= 2
+ {{acc}}[1].v = {{acc}}1;
+#endif // M0 >= 2
+#if M0 >= 3
+ {{acc}}[2].v = {{acc}}2;
+#endif // M0 >= 3
+#if M0 >= 4
+ {{acc}}[3].v = {{acc}}3;
+#endif // M0 >= 4
+#if M0 >= 8
+ {{acc}}[4].v = {{acc}}4;
+ {{acc}}[5].v = {{acc}}5;
+ {{acc}}[6].v = {{acc}}6;
+ {{acc}}[7].v = {{acc}}7;
+#endif // M0 >= 8
+#if M0 == 16
+ {{acc}}[8].v = {{acc}}8;
+ {{acc}}[9].v = {{acc}}9;
+ {{acc}}[10].v = {{acc}}A;
+ {{acc}}[11].v = {{acc}}B;
+ {{acc}}[12].v = {{acc}}C;
+ {{acc}}[13].v = {{acc}}D;
+ {{acc}}[14].v = {{acc}}E;
+ {{acc}}[15].v = {{acc}}F;
+#endif // M0 == 16
+#endif // defined(IS_TILED)
//------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD ---------------------
)_";