From 1e9e887bb9ffe75619e1b497abff78bb5f1055f9 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Fri, 6 May 2022 17:06:21 +0100 Subject: Mismatches in dynamically fused direct conv2d + add kernel Resolves: COMPMID-5269 Change-Id: I4372ea4365d14ead79153e4b08b690a1e20ab0b7 Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7531 Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/core/CL/cl_kernels/tile_helpers.h | 20 ++++++++++++++++++++ .../dynamic_fusion/ClKernelBuildingImpl/Common.h | 4 +--- .../components/ClElementwiseAddKernelComponent.cpp | 18 ++++++++++++++---- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/src/core/CL/cl_kernels/tile_helpers.h b/src/core/CL/cl_kernels/tile_helpers.h index eab06aa50d..ec57022f63 100644 --- a/src/core/CL/cl_kernels/tile_helpers.h +++ b/src/core/CL/cl_kernels/tile_helpers.h @@ -1030,6 +1030,26 @@ }) \ }) +/** Element-wise addition between two tiles (LHS and RHS) + * + * @note Performs: LHS + RHS = DST + * @note Both tiles must have same data type + * + * @param[in] DST_DATA_TYPE DST data type + * @param[in] M0 Number of LHS rows + * @param[in] N0 Number of LHS columns + * @param[in] lhs LHS tile + * @param[in] rhs RHS tile + * @param[out] dst DST tile + */ +#define T_ADD(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) \ + ({ \ + LOOP_UNROLLING(int, _m0, 0, 1, M0, \ + { \ + dst[_m0].v = CONVERT(lhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)) + CONVERT(rhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)); \ + }) \ + }) + /** Matrix multiplication * * @note Performs: LHS X RHS + DST = DST diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h index 17437c285d..9923b7ab90 100644 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h @@ -857,15 +857,13 @@ private: { auto dst_info = get_kernel_argument_info(_dst_id); auto dst_w = dst_info->dimension(0); - auto dst_h = dst_info->dimension(1); const auto tile_w = std::max(1, get_execution_window().x().step()); const auto tile_h = std::max(1, get_execution_window().y().step()); auto leftover_w = dst_w % tile_w; - auto leftover_h = dst_h % tile_h; std::string code = ""; code += std::string(" int cout = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + std::to_string(leftover_w) + ");\n"; - code += std::string(" int mout = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + std::to_string(leftover_h) + ");\n"; + code += std::string(" int mout = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n"; code += std::string(" int bout = GET_SPATIAL_IDX(2, 1, 0);\n\n"); switch(_tile_info.clipping) diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp index 2bbea8725d..47f95b5c40 100644 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp @@ -88,7 +88,12 @@ std::string ClElementwiseAddKernelComponent::get_component_code() const T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{lhs}}, cout, mout, 1, {{lhs}}_stride_y, lhs_tile); T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{rhs}}, cout, mout, 1, {{rhs}}_stride_y, rhs_tile); +#if defined(IS_BROADCAST) T_ADD_BROADCAST_X({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}}); +#else // !defined(IS_BROADCAST) + T_ADD({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}}); +#endif // defined(IS_BROADCAST) + } //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD --------------------- )_"; @@ -106,7 +111,11 @@ std::string ClElementwiseAddKernelComponent::get_component_code() const T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{addend}}, cout, mout, 1, {{addend}}_stride_y, addend_tile); +#if defined(IS_BROADCAST) T_ADD_BROADCAST_X({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}}); +#else // !defined(IS_BROADCAST) + T_ADD({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}}); +#endif // defined(IS_BROADCAST) } //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD --------------------- )_"; @@ -115,16 +124,17 @@ std::string ClElementwiseAddKernelComponent::get_component_code() const CLBuildOptions ClElementwiseAddKernelComponent::generate_build_options() const { + const auto t_src_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id); const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); CLBuildOptions build_opts{}; - const auto n0 = _blueprint->impl().get_execution_window().x().step(); - const auto m0 = _blueprint->impl().get_execution_window().y().step(); - const auto partial_m0 = t_dst_info->dimension(1) % m0; + const auto n0 = _blueprint->impl().get_execution_window().x().step(); + const auto m0 = _blueprint->impl().get_execution_window().y().step(); + const bool is_broadcast = t_src_info->tensor_shape() != t_dst_info->tensor_shape(); build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DPARTIAL_STORE_M0=" + support::cpp11::to_string(partial_m0)); + build_opts.add_option_if(is_broadcast, "-DIS_BROADCAST"); return build_opts; } -- cgit v1.2.1