diff options
author | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-12-13 13:09:10 +0000 |
---|---|---|
committer | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-12-16 15:17:51 +0000 |
commit | b84e25313e5dc7acbc03623e1e071e845047c111 (patch) | |
tree | fbee083f1262017555c64c3280da45e2b638992e /src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp | |
parent | a0ae8d2e6c57fd95c0edaf659b9df8b8c540d051 (diff) | |
download | ComputeLibrary-b84e25313e5dc7acbc03623e1e071e845047c111.tar.gz |
Add output operator for dynamic fusion
* The output of the fused operator must be explicitly specified
using GpuOutput operator.
* Any temporary tensors used to connect the output of an operator
to the input of another operator will be marked as no-alloc
and won't be allocated as a tensor in the memory.
Resolves: COMPMID-5771
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: I5ae8e800f8f737db23a055a92b01c4f1d78c3bb8
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8794
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp')
-rw-r--r-- | src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp | 27 |
1 files changed, 0 insertions, 27 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp index c560e9a931..66760b3812 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp +++ b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp @@ -40,7 +40,6 @@ namespace dynamic_fusion GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, const GpuKernelComponentGroup &components) : _services{ services }, _comp_group{ components }, _store_components{} { - add_load_store(); } GpuKernelSourceCode GpuLogicalKernel::write_kernel_code() @@ -57,32 +56,6 @@ GpuKernelSourceCode GpuLogicalKernel::write_kernel_code() return code; } - -void GpuLogicalKernel::add_load_store() -{ - const auto dst_tensors = _comp_group.get_dst_tensors(); - // Each dst tensor from the component group requires exactly one store component - for(const auto &dst_tensor : dst_tensors) - { - ArgumentPack<ITensorInfo> tensors; - // Pass same destination tensor to both source and destination of the store component - // In other words, the addition of a store component does not create a new dst tensor - // This way we avoid the issue of the dst tensor of the component group differs from that of a logical kernel - // This may seem to violate the acyclic-ness of the component graph. But it is fine because at the point of - // the construction of the logical kernel, we do not need a graph representation of components anymore - // (the graph has been serialized) - tensors.add_const_tensor(ACL_SRC_0, dst_tensor); - tensors.add_const_tensor(ACL_DST_0, dst_tensor); - - auto store = _services->component_factory().create<ClComponentStore>( - _comp_group.get_root_component()->properties(), // Store component share the same properties as that of the root component - tensors); - _store_components.push_back(std::move(store)); - auto success = _comp_group.add_component(_store_components.back().get()); - ARM_COMPUTE_UNUSED(success); - ARM_COMPUTE_ERROR_ON(!success); // It's guaranteed that any load store insertion should be successful - } -} } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute |