From b3077fbaee868579f9a41888fef1f71286d6757c Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Tue, 3 Jan 2023 17:59:14 +0000 Subject: LHS broadcasting addition for dynamic fusion * Binary elementwise operator now can have broadcasting in either X dimension, Y+Z dimension, or both, in either LHS or RHS operand. * Fix bug in CL code to support batching. Resolves: COMPMID-5704 Signed-off-by: Viet-Hoa Do Change-Id: I51b04986d30861f255ca9f754adffa0e6c85a26b Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8898 Reviewed-by: SiCong Li Reviewed-by: Ramy Elgammal Tested-by: Arm Jenkins Dynamic-Fusion: Ramy Elgammal Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- .../components/cl/ClComponentElementwiseBinary.cpp | 32 +++++++++++++++++----- 1 file changed, 25 insertions(+), 7 deletions(-) (limited to 'src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp') diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp index a17d835ac6..9b006b13ce 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -66,8 +66,30 @@ Status ClComponentElementwiseBinary::validate(const ArgumentPack &t const TensorShape out_shape = TensorShape::broadcast_shape(lhs->tensor_shape(), rhs->tensor_shape()); ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, dst->tensor_shape(), 0), "Wrong shape for dst."); - ARM_COMPUTE_RETURN_ERROR_ON_MSG((!rhs_in_place && !lhs_in_place) && detail::have_different_dimensions(lhs->tensor_shape(), dst->tensor_shape(), 0), - "Only the rhs operand can be broadcast to match the accumulator's (lhs) shape"); + + const auto &lhs_shape = lhs->tensor_shape(); + const auto &rhs_shape = rhs->tensor_shape(); + const auto &dst_shape = dst->tensor_shape(); + + ARM_COMPUTE_RETURN_ERROR_ON_MSG( + detail::have_different_dimensions(lhs_shape, dst_shape, 0) && detail::have_different_dimensions(rhs_shape, dst_shape, 0), + "Only LHS or RHS can be broadcasting, not both."); + + // Dimension Y and Z are collapsed together in the current kernel implementation, + // hence they cannot be independently broadcast or non-broadcast. + // See: ClTemplateElementwiseBinary::get_window + ARM_COMPUTE_RETURN_ERROR_ON_MSG( + (lhs_shape[1] != dst_shape[1] || rhs_shape[1] != dst_shape[1]) != (lhs_shape[2] != dst_shape[2] || rhs_shape[2] != dst_shape[2]), + "Dimension Y and Z must both be either broadcast or non-broadcast."); + + ARM_COMPUTE_RETURN_ERROR_ON_MSG( + detail::have_different_dimensions(lhs_shape, dst_shape, 3), + "LHS broadcast in dimension 3 or higher is not supported."); + + ARM_COMPUTE_RETURN_ERROR_ON_MSG( + detail::have_different_dimensions(rhs_shape, dst_shape, 3), + "RHS broadcast in dimension 3 or higher is not supported."); + // Matching data type ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, rhs); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, dst); @@ -76,10 +98,6 @@ Status ClComponentElementwiseBinary::validate(const ArgumentPack &t ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(lhs, rhs); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(lhs, dst); - // Batching case not supported yet - const size_t idx_batch = get_data_layout_dimension_index(lhs->data_layout(), DataLayoutDimension::BATCHES); - ARM_COMPUTE_RETURN_ERROR_ON_MSG((lhs->tensor_shape()[idx_batch] != 1) || (rhs->tensor_shape()[idx_batch] != 1) || (dst->tensor_shape()[idx_batch] != 1), "Batching case not supported yet"); - // All tensor infos are initialized ARM_COMPUTE_RETURN_ERROR_ON(lhs->tensor_shape().total_size() == 0); ARM_COMPUTE_RETURN_ERROR_ON(rhs->tensor_shape().total_size() == 0); -- cgit v1.2.1