aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
diff options
context:
space:
mode:
authorRamy Elgammal <ramelg01@e129512.arm.com>2023-01-11 18:48:04 +0000
committerRamy Elgammal <ramy.elgammal@arm.com>2023-01-25 10:24:23 +0000
commit002e6530f6218b00a28aef9be8b21efb08cf3602 (patch)
treef3e2f9d064b985ffe283512825b34cdd59f29f50 /src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
parentcc2877368d5e15d9ea89d31c84ec651fc0fffd13 (diff)
downloadComputeLibrary-002e6530f6218b00a28aef9be8b21efb08cf3602.tar.gz
Implement dynamic fusion softmax operator
- Return aux tensorInfo by get_aux_tensors() at runtime to init the aux tensor with the right size. - Keep softmax unfusable for this commit - Hence, added Tensor3D to template writer arguments declaration, for sake of keeping dynamic fusion softmax componenets' kernels matching their cl counterparts. Resolves: COMPMID-5523 Change-Id: I667f39545db925f667036ef448302c79a0330373 Signed-off-by: Ramy Elgammal <ramy.elgammal@arm.com> Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/483924 Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: bsgcomp <bsgcomp@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8986 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp')
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp9
1 files changed, 7 insertions, 2 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
index 4cf7a7fece..b70a192775 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
@@ -59,8 +59,13 @@ GpuKernelComponentStream GpuKernelComponentGraph::fuse(const MemoryDescriptorMap
{
const auto component = _components.at(op.op).get();
const auto success = stream.add_component(component);
- ARM_COMPUTE_ERROR_ON(!success);
- ARM_COMPUTE_UNUSED(success);
+ if(!success) // Assume first failure was because the root component is unfusable
+ {
+ stream.new_component_group();
+ const auto success = stream.add_component(component);
+ ARM_COMPUTE_ERROR_ON(!success);
+ ARM_COMPUTE_UNUSED(success);
+ }
}
return stream;