From 404462af4ca002ece819161a03a4bdb19a87abf2 Mon Sep 17 00:00:00 2001 From: Ramy Elgammal Date: Tue, 8 Nov 2022 02:14:46 +0000 Subject: Adding GpuAdd to dynamic fusion operators - Provide support for Add operator - Auto initialize the destination tensor before testing fusion in conv2d and elementwise binary ops. Resolves: COMPMID-5518 Signed-off-by: Ramy Elgammal Change-Id: Ibd815020f02b57f88eea7c2921bdcf98605d99c5 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8617 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Benchmark: Arm Jenkins --- .../sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp') diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp index 7ad7dd69f0..75e812af9f 100644 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp @@ -240,7 +240,7 @@ void ClTemplateDirectConv2d::declare_variables(GpuKernelVariableTable &vtable, c } vtable.declare_variable( _dst, - GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), + GpuKernelArgumentInfo(common_tensor_type), comp_group.is_intermediate_tensor(_dst), "dst"); } @@ -305,7 +305,7 @@ CLBuildOptions ClTemplateDirectConv2d::get_build_options(const ComponentGroup &c const unsigned int channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL); const DataType data_type = _src->data_type(); - /// NOTE: For now tile sizes (n0, m0, n0) are set by the execution window. This may change in the future + /// NOTE: For now tile sizes (n0, m0, k0) are set by the execution window. This may change in the future const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); const unsigned int n0 = root_window.x().step(); const unsigned int m0 = root_window.y().step(); -- cgit v1.2.1