aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
diff options
context:
space:
mode:
authorViet-Hoa Do <viet-hoa.do@arm.com>2022-12-13 13:09:10 +0000
committerViet-Hoa Do <viet-hoa.do@arm.com>2022-12-16 15:17:51 +0000
commitb84e25313e5dc7acbc03623e1e071e845047c111 (patch)
treefbee083f1262017555c64c3280da45e2b638992e /src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
parenta0ae8d2e6c57fd95c0edaf659b9df8b8c540d051 (diff)
downloadComputeLibrary-b84e25313e5dc7acbc03623e1e071e845047c111.tar.gz
Add output operator for dynamic fusion
* The output of the fused operator must be explicitly specified using GpuOutput operator. * Any temporary tensors used to connect the output of an operator to the input of another operator will be marked as no-alloc and won't be allocated as a tensor in the memory. Resolves: COMPMID-5771 Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com> Change-Id: I5ae8e800f8f737db23a055a92b01c4f1d78c3bb8 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8794 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp')
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp7
1 files changed, 6 insertions, 1 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
index cb643a741d..0afd0e7581 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
@@ -179,7 +179,11 @@ std::string ClTemplateWriter::write_code()
code += macros;
}
- code += write_kernel_signature(_vtable.get_variable_list(_components.get_argument_tensors()));
+ auto arguments = _components.get_argument_tensors();
+ std::sort(arguments.begin(), arguments.end(), [](const ITensorInfo *l, const ITensorInfo *r) {
+ return l->id() < r->id();
+ });
+ code += write_kernel_signature(_vtable.get_variable_list(arguments));
code += "\n{\n\n";
@@ -190,6 +194,7 @@ std::string ClTemplateWriter::write_code()
for(const auto &component_code : component_codes)
{
code += component_code;
+ code += "\n";
}
code += "}\n";