aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/template_writer/cl
diff options
context:
space:
mode:
authorJakub Sujak <jakub.sujak@arm.com>2023-01-05 14:24:13 +0000
committerJakub Sujak <jakub.sujak@arm.com>2023-01-31 15:16:25 +0000
commit7359a87040c7a901619de21701f540dd5a9a960c (patch)
treed0eb7bd77c4fec13562c7a97a207b19d5882d4e1 /src/dynamic_fusion/sketch/gpu/template_writer/cl
parente0c42ef651709fd284da3bedd2c98d420bd6fd1a (diff)
downloadComputeLibrary-7359a87040c7a901619de21701f540dd5a9a960c.tar.gz
Add Multiplication operator (FP only) to Dynamic Fusion Interface
Note: we use a separate test fixture for Multiplication op instead of reusing ElementwiseBinaryFixture to avoid exposing the internal enum ElementwiseOp to the public utils/TypePrinters.h as required by the data test case macros to print the test data. We also do not consider modifying the enum ArithmeticOp in the standard interface to include MUL without an implementation. Future work should consider refactoring this test fixture into the ElementwiseBinaryFixture to reduce the total number of fixtures/code duplication. Resolves: COMPMID-5779 Change-Id: I84207658ce0407095b028fca0ab7bfa2950255ec Signed-off-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9013 Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/template_writer/cl')
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp29
1 files changed, 15 insertions, 14 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
index 01017ed909..0dd7ca5e78 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
@@ -68,7 +68,7 @@ std::string ClTemplateElementwiseBinary::get_component_code(const ComponentGroup
code =
R"_(
- //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
+ //------------------ START KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} ---------------------
)_";
if(is_root)
@@ -139,7 +139,7 @@ R"_(
code +=
R"_(
}
- //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
+ //------------------ END KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} ---------------------
)_";
return code;
@@ -168,33 +168,34 @@ void ClTemplateElementwiseBinary::declare_variables(GpuKernelVariableTable &vtab
TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
{
- TagLUT lut{};
+ TagLUT lut{};
// Local build options
lut["meta_kernel_id"] = id();
lut["DATA_TYPE"] = get_cl_type_from_data_type(_lhs->data_type());
// Arguments and global shared variables
- lut["lhs"] = vtable.get_variable(_lhs);
- lut["rhs"] = vtable.get_variable(_rhs);
- lut["dst"] = vtable.get_variable(_dst);
+ lut["lhs"] = vtable.get_variable(_lhs);
+ lut["rhs"] = vtable.get_variable(_rhs);
+ lut["dst"] = vtable.get_variable(_dst);
lut["arg_dst"] = vtable.get_variable(comp_group.get_any_dst_tensor());
switch(_attributes.operation())
{
- case Attributes::ElementwiseOp::ADD:
+ case Attributes::ElementwiseOp::Add:
lut["ELTWISE_OP"] = "ADD";
break;
+ case Attributes::ElementwiseOp::Mul:
+ lut["ELTWISE_OP"] = "MUL";
+ break;
default:
ARM_COMPUTE_ERROR("Arithmetic Operation not supported");
}
ARM_COMPUTE_ERROR_ON(
- comp_group.is_intermediate_tensor(_lhs) &&
- detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0));
+ comp_group.is_intermediate_tensor(_lhs) && detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0));
ARM_COMPUTE_ERROR_ON(
- comp_group.is_intermediate_tensor(_rhs) &&
- detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0));
+ comp_group.is_intermediate_tensor(_rhs) && detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0));
// Set broadcast parameters
// PRE: All tensors are broadcast-compatible
@@ -222,9 +223,9 @@ TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vt
lut["rhs_m0"] = (rhs_broadcast_yz) ? "1" : "M0";
lut["rhs_start_ind_1"] = (rhs_broadcast_yz) ? "0" : "g_ind_1";
- lut["BROADCAST_OP"] = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" :
- (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" :
- "";
+ lut["BROADCAST_OP"] = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" :
+ (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" :
+ "";
return lut;
}