aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2022-05-06 17:06:21 +0100
committerMohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com>2022-05-10 09:41:10 +0100
commit1e9e887bb9ffe75619e1b497abff78bb5f1055f9 (patch)
treeb95974a8f145a4dbe6a2067bf5ed739ed7bf8867
parentb63b1196adea8b07dd8db77c2492a212650deba0 (diff)
downloadComputeLibrary-1e9e887bb9ffe75619e1b497abff78bb5f1055f9.tar.gz
Mismatches in dynamically fused direct conv2d + add kernel
Resolves: COMPMID-5269 Change-Id: I4372ea4365d14ead79153e4b08b690a1e20ab0b7 Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7531 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/CL/cl_kernels/tile_helpers.h20
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h4
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp18
3 files changed, 35 insertions, 7 deletions
diff --git a/src/core/CL/cl_kernels/tile_helpers.h b/src/core/CL/cl_kernels/tile_helpers.h
index eab06aa50d..ec57022f63 100644
--- a/src/core/CL/cl_kernels/tile_helpers.h
+++ b/src/core/CL/cl_kernels/tile_helpers.h
@@ -1030,6 +1030,26 @@
}) \
})
+/** Element-wise addition between two tiles (LHS and RHS)
+ *
+ * @note Performs: LHS + RHS = DST
+ * @note Both tiles must have same data type
+ *
+ * @param[in] DST_DATA_TYPE DST data type
+ * @param[in] M0 Number of LHS rows
+ * @param[in] N0 Number of LHS columns
+ * @param[in] lhs LHS tile
+ * @param[in] rhs RHS tile
+ * @param[out] dst DST tile
+ */
+#define T_ADD(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) \
+ ({ \
+ LOOP_UNROLLING(int, _m0, 0, 1, M0, \
+ { \
+ dst[_m0].v = CONVERT(lhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)) + CONVERT(rhs[_m0].v, VEC_DATA_TYPE(DST_DATA_TYPE, N0)); \
+ }) \
+ })
+
/** Matrix multiplication
*
* @note Performs: LHS X RHS + DST = DST
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h
index 17437c285d..9923b7ab90 100644
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h
@@ -857,15 +857,13 @@ private:
{
auto dst_info = get_kernel_argument_info(_dst_id);
auto dst_w = dst_info->dimension(0);
- auto dst_h = dst_info->dimension(1);
const auto tile_w = std::max(1, get_execution_window().x().step());
const auto tile_h = std::max(1, get_execution_window().y().step());
auto leftover_w = dst_w % tile_w;
- auto leftover_h = dst_h % tile_h;
std::string code = "";
code += std::string(" int cout = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + std::to_string(leftover_w) + ");\n";
- code += std::string(" int mout = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + std::to_string(leftover_h) + ");\n";
+ code += std::string(" int mout = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n";
code += std::string(" int bout = GET_SPATIAL_IDX(2, 1, 0);\n\n");
switch(_tile_info.clipping)
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp
index 2bbea8725d..47f95b5c40 100644
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.cpp
@@ -88,7 +88,12 @@ std::string ClElementwiseAddKernelComponent::get_component_code() const
T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{lhs}}, cout, mout, 1, {{lhs}}_stride_y, lhs_tile);
T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{rhs}}, cout, mout, 1, {{rhs}}_stride_y, rhs_tile);
+#if defined(IS_BROADCAST)
T_ADD_BROADCAST_X({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}});
+#else // !defined(IS_BROADCAST)
+ T_ADD({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}});
+#endif // defined(IS_BROADCAST)
+
}
//------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD ---------------------
)_";
@@ -106,7 +111,11 @@ std::string ClElementwiseAddKernelComponent::get_component_code() const
T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{addend}}, cout, mout, 1, {{addend}}_stride_y, addend_tile);
+#if defined(IS_BROADCAST)
T_ADD_BROADCAST_X({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}});
+#else // !defined(IS_BROADCAST)
+ T_ADD({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}});
+#endif // defined(IS_BROADCAST)
}
//------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD ---------------------
)_";
@@ -115,16 +124,17 @@ std::string ClElementwiseAddKernelComponent::get_component_code() const
CLBuildOptions ClElementwiseAddKernelComponent::generate_build_options() const
{
+ const auto t_src_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id);
const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
CLBuildOptions build_opts{};
- const auto n0 = _blueprint->impl().get_execution_window().x().step();
- const auto m0 = _blueprint->impl().get_execution_window().y().step();
- const auto partial_m0 = t_dst_info->dimension(1) % m0;
+ const auto n0 = _blueprint->impl().get_execution_window().x().step();
+ const auto m0 = _blueprint->impl().get_execution_window().y().step();
+ const bool is_broadcast = t_src_info->tensor_shape() != t_dst_info->tensor_shape();
build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
- build_opts.add_option("-DPARTIAL_STORE_M0=" + support::cpp11::to_string(partial_m0));
+ build_opts.add_option_if(is_broadcast, "-DIS_BROADCAST");
return build_opts;
}