aboutsummaryrefslogtreecommitdiff
path: root/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp
diff options
context:
space:
mode:
authorGunes Bayir <gunes.bayir@arm.com>2022-03-28 21:32:33 +0100
committerSiCong Li <sicong.li@arm.com>2022-04-13 10:36:30 +0000
commit16c5697085c256c19fb8ba4bef6188d61f30a88b (patch)
tree609bfe2082c939ff37bdf6ef37bc22fc071bd934 /src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp
parent5d606cccaabdfc435734c9fb51e11f14f3724a23 (diff)
downloadComputeLibrary-16c5697085c256c19fb8ba4bef6188d61f30a88b.tar.gz
Add DirectConvolution2D kernel component for dynamic fusion
Resolves: COMPMID-5156 Change-Id: I438da924cb80d3bce72106b06ca7181e0606bd01 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7399 Reviewed-by: SiCong Li <sicong.li@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp')
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp32
1 files changed, 31 insertions, 1 deletions
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp
index bbdf8df0a3..34b735edc9 100644
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp
@@ -41,7 +41,7 @@ ComponentType ClElementwiseAddKernelComponent::get_component_type() const
std::set<std::string> ClElementwiseAddKernelComponent::get_headers_list() const
{
- return std::set<std::string> { "gemm_helpers.h", "repeat.h" };
+ return std::set<std::string> { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "gemm_helpers.h", "repeat.h", "tile_helpers.h" };
}
Window ClElementwiseAddKernelComponent::get_window() const
@@ -78,6 +78,36 @@ std::string ClElementwiseAddKernelComponent::get_component_code() const
LOAD_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, addend, addend_addr, 0, {{addend}}_stride_y, g_zero, PARTIAL_LOAD_M0, PARTIAL_LOAD_N0, PARTIAL_COND_Y, PARTIAL_COND_X); \
MIXED_PRECISION_ELTWISE_OP_BLOCK(ADD_X_POS_0, M0, N0, {{acc}}, addend, DATA_TYPE_ACCUMULATOR, addend_hp);
}
+
+ // Workaround for the discrepancy between tiles and repeats
+#if defined(IS_TILED)
+ {{acc}}[0].v = {{acc}}0;
+#if M0 >= 2
+ {{acc}}[1].v = {{acc}}1;
+#endif // M0 >= 2
+#if M0 >= 3
+ {{acc}}[2].v = {{acc}}2;
+#endif // M0 >= 3
+#if M0 >= 4
+ {{acc}}[3].v = {{acc}}3;
+#endif // M0 >= 4
+#if M0 >= 8
+ {{acc}}[4].v = {{acc}}4;
+ {{acc}}[5].v = {{acc}}5;
+ {{acc}}[6].v = {{acc}}6;
+ {{acc}}[7].v = {{acc}}7;
+#endif // M0 >= 8
+#if M0 == 16
+ {{acc}}[8].v = {{acc}}8;
+ {{acc}}[9].v = {{acc}}9;
+ {{acc}}[10].v = {{acc}}A;
+ {{acc}}[11].v = {{acc}}B;
+ {{acc}}[12].v = {{acc}}C;
+ {{acc}}[13].v = {{acc}}D;
+ {{acc}}[14].v = {{acc}}E;
+ {{acc}}[15].v = {{acc}}F;
+#endif // M0 == 16
+#endif // defined(IS_TILED)
//------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD ---------------------
)_";