diff options
author | Gunes Bayir <gunes.bayir@arm.com> | 2022-03-28 21:32:33 +0100 |
---|---|---|
committer | SiCong Li <sicong.li@arm.com> | 2022-04-13 10:36:30 +0000 |
commit | 16c5697085c256c19fb8ba4bef6188d61f30a88b (patch) | |
tree | 609bfe2082c939ff37bdf6ef37bc22fc071bd934 /src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp | |
parent | 5d606cccaabdfc435734c9fb51e11f14f3724a23 (diff) | |
download | ComputeLibrary-16c5697085c256c19fb8ba4bef6188d61f30a88b.tar.gz |
Add DirectConvolution2D kernel component for dynamic fusion
Resolves: COMPMID-5156
Change-Id: I438da924cb80d3bce72106b06ca7181e0606bd01
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7399
Reviewed-by: SiCong Li <sicong.li@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp')
-rw-r--r-- | src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp | 32 |
1 files changed, 31 insertions, 1 deletions
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp index bbdf8df0a3..34b735edc9 100644 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp @@ -41,7 +41,7 @@ ComponentType ClElementwiseAddKernelComponent::get_component_type() const std::set<std::string> ClElementwiseAddKernelComponent::get_headers_list() const { - return std::set<std::string> { "gemm_helpers.h", "repeat.h" }; + return std::set<std::string> { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "gemm_helpers.h", "repeat.h", "tile_helpers.h" }; } Window ClElementwiseAddKernelComponent::get_window() const @@ -78,6 +78,36 @@ std::string ClElementwiseAddKernelComponent::get_component_code() const LOAD_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, addend, addend_addr, 0, {{addend}}_stride_y, g_zero, PARTIAL_LOAD_M0, PARTIAL_LOAD_N0, PARTIAL_COND_Y, PARTIAL_COND_X); \ MIXED_PRECISION_ELTWISE_OP_BLOCK(ADD_X_POS_0, M0, N0, {{acc}}, addend, DATA_TYPE_ACCUMULATOR, addend_hp); } + + // Workaround for the discrepancy between tiles and repeats +#if defined(IS_TILED) + {{acc}}[0].v = {{acc}}0; +#if M0 >= 2 + {{acc}}[1].v = {{acc}}1; +#endif // M0 >= 2 +#if M0 >= 3 + {{acc}}[2].v = {{acc}}2; +#endif // M0 >= 3 +#if M0 >= 4 + {{acc}}[3].v = {{acc}}3; +#endif // M0 >= 4 +#if M0 >= 8 + {{acc}}[4].v = {{acc}}4; + {{acc}}[5].v = {{acc}}5; + {{acc}}[6].v = {{acc}}6; + {{acc}}[7].v = {{acc}}7; +#endif // M0 >= 8 +#if M0 == 16 + {{acc}}[8].v = {{acc}}8; + {{acc}}[9].v = {{acc}}9; + {{acc}}[10].v = {{acc}}A; + {{acc}}[11].v = {{acc}}B; + {{acc}}[12].v = {{acc}}C; + {{acc}}[13].v = {{acc}}D; + {{acc}}[14].v = {{acc}}E; + {{acc}}[15].v = {{acc}}F; +#endif // M0 == 16 +#endif // defined(IS_TILED) //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD --------------------- )_"; |