From 04f4620cf999846a44089c81720aa920edec6993 Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Wed, 14 Dec 2022 14:49:56 +0000 Subject: Add multiple output support for dynamic fusion * The dependency graph now can schedule any acyclic graph into a sequential list of operators. This is needed as the output operators now form branches in the graph. * Fix the definition of input, output and intermediate tensors in GpuKernelComponentGroup to support non-linear but sequential list of operators. * Add constraint on GpuOperatorGroup to enforce strictly linear fusion style, but allow output operator as the only form of branch. Resolves: COMPMID-5771 Signed-off-by: Viet-Hoa Do Change-Id: I68de3a31a2456145081f0a397e4e61dd66327682 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8823 Reviewed-by: Gunes Bayir Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp') diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp index aac84b6c59..8f4eadc477 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp +++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp @@ -44,6 +44,8 @@ GpuWorkloadSourceCode GpuKernelComponentStream::write_workload_code() // Traverse through component groups and assemble workload together for(auto && group : _component_groups) { + group.finalize(); + // Write kernel code GpuLogicalKernel logical_kernel(_services, group); const GpuKernelSourceCode kernel_code = logical_kernel.write_kernel_code(); -- cgit v1.2.1