diff options
author | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-12-14 14:49:56 +0000 |
---|---|---|
committer | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-12-23 14:11:34 +0000 |
commit | 04f4620cf999846a44089c81720aa920edec6993 (patch) | |
tree | 1c0080ac59d5b2aa500cd2b2ceffe0575e22a4b6 /src/dynamic_fusion/sketch/gpu/template_writer | |
parent | 81fdaddaf36cb4c7ff0d2c52a370dd977a13dc72 (diff) | |
download | ComputeLibrary-04f4620cf999846a44089c81720aa920edec6993.tar.gz |
Add multiple output support for dynamic fusion
* The dependency graph now can schedule any acyclic graph into
a sequential list of operators. This is needed as the output
operators now form branches in the graph.
* Fix the definition of input, output and intermediate tensors
in GpuKernelComponentGroup to support non-linear but sequential
list of operators.
* Add constraint on GpuOperatorGroup to enforce strictly linear
fusion style, but allow output operator as the only form of
branch.
Resolves: COMPMID-5771
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: I68de3a31a2456145081f0a397e4e61dd66327682
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8823
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/template_writer')
7 files changed, 8 insertions, 10 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp index c3128ea552..8adf056912 100644 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp +++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp @@ -125,7 +125,7 @@ TagLUT ClTemplateActivation::get_tag_lut(const GpuKernelVariableTable &vtable, c lut["src"] = vtable.get_variable(_src); lut["dst"] = vtable.get_variable(_dst); - const auto dst_argument = vtable.get_variable(comp_group.get_dst_tensors()[0]); + const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor()); lut["arg_dst"] = dst_argument.uniq_name; // Local build options diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp index 1ac49406a8..6ab3a68bb0 100644 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp +++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp @@ -137,7 +137,7 @@ TagLUT ClTemplateCast::get_tag_lut(const GpuKernelVariableTable &vtable, const C lut["src"] = vtable.get_variable(_src); lut["dst"] = vtable.get_variable(_dst); - const auto dst_argument = vtable.get_variable(comp_group.get_dst_tensors()[0]); + const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor()); lut["arg_dst"] = dst_argument.uniq_name; // Local build options diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp index 389bd5c65f..6fa77aafe3 100644 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp @@ -251,7 +251,7 @@ TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtab } lut["dst"] = vtable.get_variable(_dst); - const auto dst_argument = vtable.get_variable(comp_group.get_dst_tensors()[0]); + const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor()); lut["arg_dst"] = dst_argument.uniq_name; // Local build options diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp index aa324ffb54..221addb7b5 100644 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp @@ -268,7 +268,7 @@ TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable, } lut["dst"] = vtable.get_variable(_dst); - const auto dst_argument = vtable.get_variable(comp_group.get_dst_tensors()[0]); + const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor()); lut["arg_dst"] = dst_argument.uniq_name; // Local build options diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp index 6c1e0fb1de..39cec6e31c 100644 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp @@ -194,7 +194,7 @@ TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vt lut["lhs"] = vtable.get_variable(_lhs); lut["rhs"] = vtable.get_variable(_rhs); lut["dst"] = vtable.get_variable(_dst); - lut["out"] = vtable.get_variable(comp_group.get_dst_tensors().front()); + lut["out"] = vtable.get_variable(comp_group.get_any_dst_tensor()); } else { diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp index e4b662b3a8..ef4f2f22a1 100644 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp +++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp @@ -84,9 +84,9 @@ TagLUT ClTemplateStore::get_tag_lut(const GpuKernelVariableTable &vtable, const // Local build options lut["meta_kernel_id"] = id(); lut["DST_TENSOR_TYPE"] = "BUFFER"; - const auto dst_info = comp_group.get_dst_tensors()[0]; - lut["DST_DATA_TYPE"] = dst_info->data_type(); + lut["DST_DATA_TYPE"] = _dst->data_type(); + ARM_COMPUTE_UNUSED(comp_group); return lut; } diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp index 0afd0e7581..eed481f109 100644 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp +++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp @@ -203,9 +203,7 @@ std::string ClTemplateWriter::write_code() } std::string ClTemplateWriter::write_global_section() const { - const auto dst_tensors = _components.get_dst_tensors(); - ARM_COMPUTE_ERROR_ON_MSG(dst_tensors.size() != 1, "Only one destination tensor per kernel is allowed"); - const auto dst_info = dst_tensors[0]; + const auto dst_info = _components.get_any_dst_tensor(); const auto dst_w = dst_info->dimension(0); const auto tile_w = std::max(1, get_window().x().step()); const auto tile_h = std::max(1, get_window().y().step()); |