aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2022-11-09 15:57:48 +0000
committerSiCong Li <sicong.li@arm.com>2022-11-22 14:09:34 +0000
commit31df05a1870662a7288fbaeb6fbc7fc458bb5a73 (patch)
treee75a132b8b5fd21cbceec8d0aa88da893e9c4f43 /arm_compute
parent73bb6b7ad80801e56633ad4ea12b0404b586a979 (diff)
downloadComputeLibrary-31df05a1870662a7288fbaeb6fbc7fc458bb5a73.tar.gz
Remove dynamic fusion prototype with tests and examples
Public headers of the new experimental dynamic fusion can be found in arm_compute/dynamic_fusion/ New examples on how to use the interface can be found in tests/validation/dynamic_fusion/gpu/Integration.cpp Resolves COMPMID-5683 Change-Id: I7ccb902a227fb487562df15fc3c30118d1d95bbd Signed-off-by: SiCong Li <sicong.li@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8671 Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/experimental/ClWorkload.h219
-rw-r--r--arm_compute/core/experimental/DependencyGraph.h277
-rw-r--r--arm_compute/core/experimental/IWorkload.h132
-rw-r--r--arm_compute/core/experimental/OperatorGraph.h217
-rw-r--r--arm_compute/runtime/CL/CLScheduler.h31
-rw-r--r--arm_compute/runtime/CL/CLTuner.h4
-rw-r--r--arm_compute/runtime/CL/ICLTuner.h18
-rw-r--r--arm_compute/runtime/experimental/ClCompositeOperator.h190
8 files changed, 0 insertions, 1088 deletions
diff --git a/arm_compute/core/experimental/ClWorkload.h b/arm_compute/core/experimental/ClWorkload.h
deleted file mode 100644
index 9b2040a046..0000000000
--- a/arm_compute/core/experimental/ClWorkload.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLWORKLOAD_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLWORKLOAD_H
-
-#include "arm_compute/core/CL/CLCompileContext.h"
-#include "arm_compute/core/GPUTarget.h"
-#include "arm_compute/core/Window.h"
-
-#include "arm_compute/core/experimental/IWorkload.h"
-#include "arm_compute/core/experimental/OperatorGraph.h"
-
-#include <map>
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-/** Verbose and explicit way to enumerate all the tensor arguments variants used by
- * all kernel implementations. This avoids any ambiguity in what kernel arguments are passed
- */
-enum class ClKernelTensorArgType : int
-{
- Scalar,
-
- Vector,
-
- Image,
- Image_Reinterpret_As_3D,
- Image_Export_To_ClImage2D,
-
- Image_3D, // 3D Tensor represented as a 2D Image + stride_z
- Image_3D_Export_To_ClImage2D,
-
- Tensor_3D,
- Tensor_4D,
- Tensor_4D_t_Buffer,
- Tensor_4D_t_Image
-};
-
-/** Describes all the info required to add a kernel argument at run time
- *
- * @note This struct can later be expanded into a more concise and formal way to specify how to set up
- * arguments for a kernel inside a @ref ClUnitWorkload
- */
-struct ClKernelArgDescriptor
-{
- ClKernelArgDescriptor() = default;
- ClKernelArgDescriptor(int arg_id, ClKernelTensorArgType type, bool slide_along_dimz = true)
- : arg_id{ arg_id }, tensor_arg_type{ type }, slide_along_dimz{ slide_along_dimz }
- {
- }
- ~ClKernelArgDescriptor() = default;
- friend bool operator==(const ClKernelArgDescriptor &arg0, const ClKernelArgDescriptor &arg1)
- {
- return (arg0.tensor_arg_type == arg1.tensor_arg_type) && (arg0.slide_along_dimz == arg1.slide_along_dimz);
- }
- int arg_id{ -1 }; /**< Arg ID in the blueprint, -1 means empty / uninitialized */
- ClKernelTensorArgType tensor_arg_type{ ClKernelTensorArgType::Image }; /**< tensor argument type */
- bool slide_along_dimz{ true }; /**< @note slide_along_dimz will be moved out of this descriptor in later iterations */
-};
-
-using ClKernelArgList = std::map<int, ClKernelArgDescriptor>;
-
-/** Descriptor containing information required to run a single ClWorkload
- */
-struct ClExecutionDescriptor
-{
- cl::NDRange suggested_lws{}; /**< Suggested local work-group size for optimal performance if not zero */
- cl::NDRange gws{}; /**< Global work-group to be used */
- bool skip_sliding_window{ false }; /**< Skip sliding window slices during execution loop */
-};
-
-/** Contains kernel code to be compiled and run in a ClUnitWorkload
- */
-struct ClKernelCode
-{
- friend bool operator==(const ClKernelCode &code0, const ClKernelCode &code1)
- {
- return (code0.name == code1.name) && (code0.code == code1.code) && (code0.config_id == code1.config_id) && (code0.build_options == code1.build_options) && (code0.window == code1.window)
- && (code0.arguments == code1.arguments);
- }
- std::string name{}; /**< Kernel name */
- std::string code{}; /**< Kernel source code */
- std::string config_id{}; /**< Generated from blueprint based on complex component */
- CLBuildOptions build_options{}; /**< Kernel build options */
- Window window{}; /**< Execution window */
- ClKernelArgList arguments{}; /**< Kernel argument descriptors. map key is kernel ArgumentID */
-};
-
-/** A descriptor of ClWorkload Tensors.
- */
-struct ClWorkloadTensor : public WorkloadTensor
-{
- ClWorkloadTensor() = default;
- ClWorkloadTensor(Id id, ITensorInfo *info, MemoryType memory_type, const AuxMemoryInfo &memory_info, const ClKernelArgDescriptor &kernel_arg)
- : WorkloadTensor{ id, info, memory_type, memory_info }, kernel_arg{ kernel_arg }
- {
- }
- ClKernelArgDescriptor kernel_arg{};
- friend bool operator==(const ClWorkloadTensor &t0, const ClWorkloadTensor &t1)
- {
- return t0.info == t1.info && t0.memory_info == t1.memory_info && t0.memory_type == t1.memory_type && t0.kernel_arg == t1.kernel_arg;
- }
-};
-
-/** The basic atomic unit in a @ref ClWorkload. It contains exactly one kernel to run.
- */
-struct ClUnitWorkload : public UnitWorkload
-{
- ClUnitWorkload() = default;
- ClUnitWorkload(Id id, UnitWorkloadStage stage, const ClKernelCode &code)
- : UnitWorkload{ id, stage }, code{ code }
- {
- }
- friend bool operator==(const ClUnitWorkload &uworkload0, const ClUnitWorkload &uworkload1)
- {
- return uworkload0.stage == uworkload1.stage && uworkload0.code == uworkload1.code;
- }
- ClKernelCode code{};
-};
-
-/** GPU information for @ref ClWorkloadContext
- */
-struct GpuInfo
-{
- friend bool operator==(const GpuInfo &info0, const GpuInfo &info1)
- {
- return info0.target == info1.target;
- }
- GPUTarget target{ GPUTarget::UNKNOWN };
-};
-
-/** Context (device capabilities, platform details) associated with a ClWorkload
- *
- * It is required for building the @ref ClKernelCode and could also be used by the runtime (e.g. schedulers)
- */
-struct ClWorkloadContext
-{
- friend bool operator==(const ClWorkloadContext &ctx0, const ClWorkloadContext &ctx1)
- {
- return ctx0.gpu_info == ctx1.gpu_info;
- }
- GpuInfo gpu_info{};
-};
-
-/** Workload for Cl backend
- */
-struct ClWorkload : public IWorkload
-{
- Tid add_workload_tensor(ITensorInfo *info, MemoryType memory_type, const AuxMemoryInfo &memory_info, const ClKernelArgDescriptor &kernel_arg, Tid merge_point)
- {
- Tid id = graph.add_tensor(merge_point);
- if(tensors.find(id) == tensors.end())
- {
- tensors[id] = ClWorkloadTensor(id, info, memory_type, memory_info, kernel_arg);
- }
- return id;
- }
- UnitWorkId add_unit_workload(UnitWorkloadStage stage, const ClKernelCode &code, const std::vector<Tid> &inputs, const std::vector<Tid> &outputs)
- {
- auto op = graph.add_operator(inputs, outputs);
- auto id = op.second;
- unit_workloads[id] = ClUnitWorkload(id, stage, code);
- return id;
- }
- friend bool operator==(const ClWorkload &workload0, const ClWorkload &workload1)
- {
- return std::make_tuple(
- workload0.graph, workload0.context, workload0.unit_workloads, workload0.tensors, workload0.op_tensor_id_lut)
- == std::make_tuple(
- workload1.graph, workload1.context, workload1.unit_workloads, workload1.tensors, workload1.op_tensor_id_lut);
- }
- ClWorkloadContext context{}; /**< Workload context*/
- std::map<UnitWorkId, ClUnitWorkload> unit_workloads{}; /**< Unit workloads to run*/
- std::map<Tid, ClWorkloadTensor> tensors{}; /**< Workload tensors*/
- std::map<Tid, OpTensor::Id> op_tensor_id_lut{}; /**< Map from ClWorkloadTensor to SRC and DST Operator Tensors (no need to store "intermediate" Operator Tensors)*/
- Status status{}; /**< For compatibility with the IOperator validate method. Store if the workload is valid or not. */
-};
-
-/** Build a @ref ClWorkload from an @ref OperatorGraph.
- *
- * @param[out] workload
- * @param[in] op_graph
- * @param[in] ctx
- * @return Status
- */
-Status build(ClWorkload &workload, const OperatorGraph &op_graph, const ClWorkloadContext &ctx);
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLWORKLOAD_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/arm_compute/core/experimental/DependencyGraph.h b/arm_compute/core/experimental/DependencyGraph.h
deleted file mode 100644
index e0d6ff9ba9..0000000000
--- a/arm_compute/core/experimental/DependencyGraph.h
+++ /dev/null
@@ -1,277 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_DEPENDENCYGRAPH_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_DEPENDENCYGRAPH_H
-
-#include "arm_compute/core/Error.h"
-
-#include <algorithm>
-#include <map>
-#include <vector>
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-template <typename T>
-bool is_in(const T &v, const std::vector<T> &vec)
-{
- return std::find(std::begin(vec), std::end(vec), v) != std::end(vec);
-}
-
-/** The dependency graph of a workload, where the nodes are of 2 types: Tensor or Operator
- * Represented as a doubly-linked adjacency list with the differentiation between source and destination
- *
- * A "Merge Tensor" is an external tensor associated with the tensor within the graph, and serve as a merge point
- */
-class DependencyGraph
-{
-public:
- /** A serial Id allocator
- *
- */
- class SerialIdAllocator
- {
- public:
- using Id = int;
- Id alloc()
- {
- return _counter++;
- }
- constexpr static Id empty()
- {
- return -1;
- }
-
- private:
- Id _counter{ 0 };
- };
- using Id = SerialIdAllocator::Id;
- /** Adjacency list
- *
- */
- using AdjList = std::map<Id, std::vector<Id>>;
-
- /** A pack of operator including its input and output tensors, used by traversing through the graph in topological order
- *
- */
- struct OpPack
- {
- Id op{};
- std::vector<Id> inputs{};
- std::vector<Id> outputs{};
- friend bool operator==(const OpPack &opp0, const OpPack &opp1)
- {
- return std::make_tuple(
- opp0.op, opp0.inputs, opp0.outputs)
- == std::make_tuple(
- opp1.op, opp1.inputs, opp1.outputs);
- }
- };
-
-public:
- constexpr static Id empty_id()
- {
- return SerialIdAllocator::empty();
- }
-
- DependencyGraph() = default;
- // Used in cases where two DependencyGraphs may want to share the same configuration of tensors
- explicit DependencyGraph(const std::vector<Id> &imported_tensors);
- // Testing only
- DependencyGraph(const AdjList &adj_src_tensors, const AdjList &adj_dst_tensors, const AdjList &adj_src_ops, const AdjList &adj_dst_ops, std::map<Id, Id> merge_points = {});
-
- /** Add a new tensor
- *
- * @param merge_tensor The external merge point associated with the tensor. Leave empty if not needed.
- * @return Id The newly allocated tensor, or a previously added tensor associated with @p merge_tensor
- */
- Id add_tensor(Id merge_tensor = empty_id());
-
- void remove_tensor(Id tensor);
-
- /** Add a new operator
- *
- * @param inputs Input tensors to the operator
- * @param outputs Output tensors to the operator
- * @return std::pair<Status, DependencyGraph::Id> where id is the newly allocated operator
- */
- std::pair<Status, DependencyGraph::Id> add_operator(const std::vector<Id> &inputs, const std::vector<Id> &outputs);
-
- void remove_operator(Id op);
- /** Sort the graph in a topological order
- *
- * @return std::pair<Status, std::vector<OpPack>>
- */
- std::pair<Status, std::vector<OpPack>> topological_sort() const;
-
- std::vector<Id> src_ops(Id op) const;
- std::vector<Id> dst_ops(Id op) const;
-
- std::vector<Id> src_ops_from_tensor(Id tensor) const;
- std::vector<Id> dst_ops_from_tensor(Id tensor) const;
- /** Get the merge points object
- *
- * @return std::map<Id, Id>
- */
- std::map<Id, Id> get_merge_points() const;
- /** Get all root ops. Root ops can also be referred to as "src ops" of the whole graph
- *
- * @return std::vector<Id>
- */
- std::vector<Id> get_root_ops() const;
- /** Get all dst ops of the whole graph
- *
- * @return std::vector<Id>
- */
- std::vector<Id> get_dst_ops() const;
-
- /** Get source tensors to an operator
- *
- * @param op
- * @return std::vector<Id>
- */
- std::vector<Id> src_tensors(Id op) const;
- /** Get destination tensors to an operator
- *
- * @param op
- * @return std::vector<Id>
- */
- std::vector<Id> dst_tensors(Id op) const;
- /** Get source tensors of the whole graph
- *
- * @return std::vector<Id>
- */
- std::vector<Id> src_tensors() const;
- /** Get destination tensors of the whole graph
- *
- * @return std::vector<Id>
- */
- std::vector<Id> dst_tensors() const;
- /** Get all operators
- *
- * @return std::vector<Id>
- */
- std::vector<Id> all_ops() const;
- /** Get all tensors
- *
- * @return std::vector<Id>
- */
- std::vector<Id> all_tensors() const;
- /** Number of operators
- *
- * @return unsigned int
- */
- unsigned int number_of_ops() const;
- /** Number of tensors
- *
- * @return unsigned int
- */
- unsigned int number_of_tensors() const;
-
- /** Update @p merge_point to point to @p t_id
- *
- * @param t_id
- * @param merge_point
- */
- Status update_merge_point(Id t_id, Id merge_point);
-
- /** Strict equality comparison (all internal ids and order of insertion matter).
- * In the future this may be replaced with a topological comparison, allowing equivalent graphs with different internal ids to be equal
- *
- *
- * @param g0
- * @param g1
- * @return true
- * @return false
- */
- friend bool operator==(const DependencyGraph &g0, const DependencyGraph &g1)
- {
- // Do not compare id allocators
- return std::make_tuple(
- g0._adj_src_tensors, g0._adj_dst_tensors, g0._adj_src_ops, g0._adj_dst_ops, g0._merge_to_internal)
- == std::make_tuple(
- g1._adj_src_tensors, g1._adj_dst_tensors, g1._adj_src_ops, g1._adj_dst_ops, g1._merge_to_internal);
- }
- void link_input(Id op, Id in_tensor);
- void link_output(Id op, Id out_tensor);
- /** Check if there's a path from @p src_tensor to @p dst_op
- *
- * @param src_tensor
- * @param dst_op
- * @return true
- * @return false
- */
- bool path_exists_from_tensor_to_op(Id src_tensor, Id dst_op) const;
- /** Check if there's a path from @p src_op to @p dst_op
- *
- * @param src_op
- * @param dst_op
- * @return true
- * @return false
- */
- bool path_exists_from_op_to_op(Id src_op, Id dst_op) const;
- /** Check if tensor is the src tensor of the entire graph
- *
- * @param tensor
- * @return true
- * @return false
- */
- bool is_src_tensor(Id tensor) const;
- /** Check if tensor is the dst tensor of the entire graph
- *
- * @param tensor
- * @return true
- * @return false
- */
- bool is_dst_tensor(Id tensor) const;
-
-private:
- Id insert_new_tensor();
- Id insert_new_op();
- bool tensor_exists(Id tensor) const;
- bool operator_exists(Id op) const;
- bool is_src_tensor_of(Id op, Id tensor) const;
- bool is_dst_tensor_of(Id op, Id tensor) const;
- bool are_connected(Id op, Id tensor) const;
-
-private:
- AdjList _adj_src_tensors{};
- AdjList _adj_dst_tensors{};
- AdjList _adj_src_ops{};
- AdjList _adj_dst_ops{};
- std::map<Id, Id> _merge_to_internal{}; // From merge tensor to internal tensor
- SerialIdAllocator _operator_id{};
- SerialIdAllocator _tensor_id{};
-};
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_DEPENDENCYGRAPH_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/arm_compute/core/experimental/IWorkload.h b/arm_compute/core/experimental/IWorkload.h
deleted file mode 100644
index 54855c1084..0000000000
--- a/arm_compute/core/experimental/IWorkload.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IWORKLOAD_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IWORKLOAD_H
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/ITensorInfo.h"
-#include "arm_compute/core/experimental/Types.h"
-
-#include "arm_compute/core/experimental/DependencyGraph.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-/** Describes when a Unit Workload is run.
- *
- */
-struct UnitWorkloadStage
-{
- enum class Stage
- {
- Prepare, /**< Only run once at the beginning. */
- Run, /**< Run every time after the first time. */
- };
- Stage stage;
- friend bool operator==(const UnitWorkloadStage &stage0, const UnitWorkloadStage &stage1)
- {
- return stage0.stage == stage1.stage;
- }
-};
-/** Type of memory used by a Workload Tensor
- *
- */
-enum class MemoryType
-{
- Core = 0, /**< Core memory used by the Workload Tensor, e.g. for argument tensors */
- Auxiliary = 1, /**< Auxiliary memory required by the Workload Tensor, e.g. for temporary tensors */
-};
-
-using AuxMemoryLifetime = MemoryLifetime;
-
-/** Memory Info for a @ref WorkloadTensor of Auxiliary memory type. This communicates to the user how much additional
- * memory is required for auxiliary tensors
- */
-struct AuxMemoryInfo
-{
- AuxMemoryInfo() = default;
-
- AuxMemoryInfo(size_t size, size_t alignment = 0) noexcept
- : size(size),
- alignment(alignment)
- {
- }
-
- AuxMemoryInfo(AuxMemoryLifetime lifetime, size_t size, size_t alignment = 0) noexcept
- : lifetime(lifetime),
- size(size),
- alignment(alignment)
- {
- }
- friend bool operator==(const AuxMemoryInfo &info0, const AuxMemoryInfo &info1)
- {
- return info0.lifetime == info1.lifetime && info0.size == info1.size && info0.alignment == info1.alignment;
- }
-
- AuxMemoryLifetime lifetime{ AuxMemoryLifetime::Temporary }; /**< Memory lifetime*/
- size_t size{ 0 }; /**< Total memory size in bytes */
- size_t alignment{ 64 }; /**< Memory alignment in bytes */
-};
-
-/** A descriptor for IWorkload Tensors.
- */
-struct WorkloadTensor
-{
- using Id = DependencyGraph::Id;
- Id id{}; /**< Id of the workload tensor */
- ITensorInfo *info{}; /**< TensorInfo associated with the workload tensor */
- MemoryType memory_type{}; /**< Memory type */
- AuxMemoryInfo memory_info{}; /**< Auxiliary memory information. This can be ignored if the memory type is Core */
-};
-/** The basic atomic unit in an @ref IWorkload. It contains exactly one kernel to run.
- *
- */
-struct UnitWorkload
-{
- using Id = DependencyGraph::Id;
- Id id{}; /**< Id of the unit workload */
- UnitWorkloadStage stage{}; /**< Stage */
-};
-
-/** Run-time-agnostic, platform-specific graph that describes everything required to run a workload
- * It can be configured into an Arm Compute Library runtime, integrated into the runtime of another framework, or integrated into the compilation flow
- */
-struct IWorkload
-{
- using UnitWorkId = UnitWorkload::Id;
- using Tid = WorkloadTensor::Id;
- IWorkload() = default;
- virtual ~IWorkload() = default;
- DependencyGraph graph{}; /**< Dependency graph of the workload tensors and the unit workloads */
-};
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IWORKLOAD_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/arm_compute/core/experimental/OperatorGraph.h b/arm_compute/core/experimental/OperatorGraph.h
deleted file mode 100644
index f40ad0d8c5..0000000000
--- a/arm_compute/core/experimental/OperatorGraph.h
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_OPERATORGRAPH
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_OPERATORGRAPH
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/ITensorInfo.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-/** Graph of operators to execute within a Workload. This is a pure descriptive construct.
- */
-class OperatorGraph final
-{
-public:
- struct Implementation;
- OperatorGraph();
- ~OperatorGraph();
-
-public:
- Implementation *impl();
- const Implementation *impl() const;
-
-private:
- std::unique_ptr<Implementation> _impl;
-};
-
-/** Return the validity of @p op_graph, usually after performing an operation (e.g. add_tensor) on it
- *
- * @param[in,out] op_graph OperatorGraph to be validated
- *
- * @return Status
- */
-Status validate(const OperatorGraph &op_graph);
-
-/** Operator Tensor Handle
- * This can be either an argument tensor, or an intermediate tensor linking 2 @ref Operator s
- */
-class OpTensor final
-{
-public:
- using Id = int;
- OpTensor(Id id = {});
- /** Id of the OpTensor
- * @return Id
- */
- Id id() const;
-
-private:
- Id _id{};
-};
-
-/** Provide order of @ref OpTensor by checking if @p t0 is "lower than" @p t1
- *
- * @param[in] t0 OpTensor
- * @param[in] t1 OpTensor
- *
- * @return true if @p t0 is lower than @p t1
- * @return false otherwise
- */
-bool operator<(const OpTensor &t0, const OpTensor &t1);
-
-/** Associate a TensorInfo with a newly created @ref OpTensor in the @p graph.
- *
- * @note @p info needs to remain in scope and valid until the workload has finished building
- * @note Can pass in an empty TensorInfo for a destination Tensor, in which case @p info will be inferred from the source tensors
- *
- * @param[in,out] graph OperatorGraph where the tensor is added
- * @param[in] info TensorInfo to be associated
- *
- * @return OpTensor
- */
-OpTensor add_tensor(OperatorGraph &graph, ITensorInfo &info);
-
-/** Operator Handle
- * This can be used to further modify an existing operator
- */
-class Operator final
-{
-public:
- using Id = int;
- Operator(Id id = {});
- /** Id of the Operator
- * @return Id
- */
- Id id() const;
-
-private:
- Id _id{};
-};
-
-/** Provide order of @ref Operator by checking if @p op0 is "lower than" @p op1
- *
- * @param[in] op0 Operator
- * @param[in] op1 Operator
- *
- * @return true if @p op0 is lower than @p op1
- * @return false otherwise
- */
-bool operator<(const Operator &op0, const Operator &op1);
-
-/** Descriptor for Conv2dDescriptor operation
- */
-struct Conv2dDescriptor
-{
- /* TOSA compliant attribute parameters start */
- Padding2D pad{};
- Size2D stride{ 1U, 1U };
- Size2D dilation{ 1U, 1U };
- /* TOSA compliant attribute parameters end */
- /* Non-TOSA compliant attribute parameters start */
- /* Non-TOSA compliant attribute parameters end */
-};
-/** Add op Conv2d to @p graph
- *
- * @param[in,out] graph OperatorGraph where the operator is added to
- * @param[in] desc Operator descriptor
- * @param[in] input Input OpTensor
- * @param[in] weights Weights OpTensor
- * @param[in] bias (Optional) bias OpTensor
- * @param[in] dst Destination OpTensor
- *
- * @return Operator
- */
-Operator add_op_conv2d(OperatorGraph &graph, const Conv2dDescriptor &desc, OpTensor input, OpTensor weights, OpTensor bias, OpTensor dst);
-Operator add_op_conv2d(OperatorGraph &graph, const Conv2dDescriptor &desc, OpTensor input, OpTensor weights, OpTensor dst);
-/** (Only for Debuging and Testing) Force a conv2d method
- *
- * @param[in,out] graph OperatorGraph where conv2d op is located
- * @param[in] conv2d Conv2d Op
- * @param[in] method Forced ConvolutionMethod
- */
-void force_conv2d_method(OperatorGraph &graph, Operator conv2d, ConvolutionMethod method);
-
-/** Descriptor for Elementwise binary operation
- *
- */
-struct ElementwiseDescriptor
-{
- /* TOSA compliant attribute parameters start */
- /* TOSA compliant attribute parameters end */
- /* Non-TOSA compliant attribute parameters start */
- ArithmeticOperation op;
- /* Non-TOSA compliant attribute parameters end */
-};
-/** Add op Elementwise to @p graph, and optionally describes fusion through passing of intermediate @ref OpTensor s
- *
- * @param[in,out] graph OperatorGraph where the operator is added to
- * @param[in] desc Operator descriptor
- * @param[in] lhs Lhs OpTensor
- * @param[in] rhs Rhs OpTensor
- * @param[in] dst Destination OpTensor
- *
- * @return Operator
- */
-Operator add_op_elementwise_op(OperatorGraph &graph, const ElementwiseDescriptor &desc, OpTensor lhs, OpTensor rhs, OpTensor dst);
-
-/** Descriptor for Floor operation
- *
- */
-struct FloorDescriptor
-{
- /* TOSA compliant attribute parameters start */
- /* TOSA compliant attribute parameters end */
- /* Non-TOSA compliant attribute parameters start */
- /* Non-TOSA compliant attribute parameters end */
-};
-/** Add op Floor to @p graph, and optionally describes fusion through passing of intermediate @ref OpTensor s
- *
- * @param[in,out] graph OperatorGraph where the operator is added to
- * @param[in] desc Operator descriptor
- * @param[in] src Source OpTensor
- * @param[in] dst Destination OpTensor
- *
- * @return Operator
- */
-Operator add_op_floor(OperatorGraph &graph, const FloorDescriptor &desc, OpTensor src, OpTensor dst);
-
-bool operator==(const OpTensor &t0, const OpTensor &t1);
-bool operator==(const Conv2dDescriptor &conv2d0, const Conv2dDescriptor &conv2d1);
-bool operator==(const ElementwiseDescriptor &, const ElementwiseDescriptor &);
-bool operator==(const FloorDescriptor &, const FloorDescriptor &);
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_OPERATORGRAPH
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h
index 3919635d1b..3030239270 100644
--- a/arm_compute/runtime/CL/CLScheduler.h
+++ b/arm_compute/runtime/CL/CLScheduler.h
@@ -35,19 +35,6 @@
#include "arm_compute/runtime/CL/CLTypes.h"
#include "arm_compute/runtime/CL/ICLTuner.h"
-#if defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-struct ClExecutionDescriptor;
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif // defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
-
namespace arm_compute
{
class ICLKernel;
@@ -108,20 +95,6 @@ public:
* @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel. This will be ignored if job chaining is enabled.
*/
void enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush = true);
-
-#if defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
-
- /** Schedule the execution of the passed kernel if possible.
- *
- * @param[in] kernel Kernel to execute.
- * @param[in] tensors Map containing the tensors to operate on.
- * @param[in] exec_desc Execution descriptor
- * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel. This will be ignored if job chaining is enabled.
- */
- void enqueue_op(ICLKernel &kernel, ITensorPack &tensors, const experimental::dynamic_fusion::ClExecutionDescriptor &exec_desc, bool flush = true);
-
-#endif // defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
-
/** Initialises the context and command queue to be used by the scheduler.
*
* @param[in] context A CL context.
@@ -214,10 +187,6 @@ private:
*/
void flush_queue(bool flush);
-#if defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
- void enqueue_common(ICLKernel &kernel, ITensorPack &tensors, const experimental::dynamic_fusion::ClExecutionDescriptor &exec_desc, bool flush);
-#endif // defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
-
/** Flag to ensure symbols initialisation is happening before Scheduler creation */
static std::once_flag _initialize_symbols;
diff --git a/arm_compute/runtime/CL/CLTuner.h b/arm_compute/runtime/CL/CLTuner.h
index 88933fc2d8..93aa45adc1 100644
--- a/arm_compute/runtime/CL/CLTuner.h
+++ b/arm_compute/runtime/CL/CLTuner.h
@@ -124,10 +124,6 @@ public:
void tune_kernel_static(ICLKernel &kernel) override;
void tune_kernel_dynamic(ICLKernel &kernel) override;
void tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors) override;
-#if defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
- void tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors, const experimental::dynamic_fusion::ClExecutionDescriptor &exec_desc) override;
-#endif // defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
-
/** Is the kernel_event set ?
*
* @return true if the kernel_event is set.
diff --git a/arm_compute/runtime/CL/ICLTuner.h b/arm_compute/runtime/CL/ICLTuner.h
index e0ee3ffe71..fa7a1424b8 100644
--- a/arm_compute/runtime/CL/ICLTuner.h
+++ b/arm_compute/runtime/CL/ICLTuner.h
@@ -30,15 +30,6 @@ namespace arm_compute
{
class ICLKernel;
-#if defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
-namespace experimental
-{
-namespace dynamic_fusion
-{
-struct ClExecutionDescriptor;
-} // namespace dynamic_fusion
-} // namespace experimental
-#endif // defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
/** Basic interface for tuning the OpenCL kernels */
class ICLTuner
{
@@ -66,15 +57,6 @@ public:
* @param[in, out] tensors Tensors for the kernel to use
*/
virtual void tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors) = 0;
-#if defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
- /** Tune OpenCL kernel dynamically for dynamic fusion interface
- *
- * @param[in] kernel Kernel to tune
- * @param[in, out] tensors Tensors for the kernel to use
- * @param[in] exec_desc Execution descriptor
- */
- virtual void tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors, const experimental::dynamic_fusion::ClExecutionDescriptor &exec_desc) = 0;
-#endif // defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_ICLTUNER_H */
diff --git a/arm_compute/runtime/experimental/ClCompositeOperator.h b/arm_compute/runtime/experimental/ClCompositeOperator.h
deleted file mode 100644
index 827629bd82..0000000000
--- a/arm_compute/runtime/experimental/ClCompositeOperator.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMIC_FUSION_CLCOMPOSITEOPERATOR_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMIC_FUSION_CLCOMPOSITEOPERATOR_H
-
-#include "arm_compute/core/CL/CLCompileContext.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/IOperator.h"
-
-#include "arm_compute/core/experimental/ClWorkload.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-/** Map OpTensor handles to their corresponding ITensor memory
- */
-using OpTensorBinding = std::map<OpTensor, ITensor *>;
-
-/** Map a kernel (as identified by its unit workload id) to its corresponding tensor pack
- *
- * @note External user should not use the add_tensor_pack method to alter this tensor pack map, and should only use the map returned by @ref bind_tensors
- */
-class TensorPackMap
-{
-public:
- /** Find a tensor pack associated with the unit workload Id @p uwk_id
- *
- * @param[in] uwk_id unit workload Id associated with the tensor pack
- *
- * @return ITensorPack*
- */
- ITensorPack *find_tensor_pack(UnitWorkload::Id uwk_id);
- /** Get a tensor pack associated with @p uwk_id. Throws a exception if it cannot be found.
- *
- * @param[in] uwk_id unit workload Id associated with the tensor pack
- *
- * @return ITensorPack*
- */
- ITensorPack &get_tensor_pack(UnitWorkload::Id uwk_id);
- /** Add a tensor pack and associate it with unit workload Id @p uwk_id
- * @note Should not be used by external user
- *
- * @param[in] uwk_id unit workload Id associated with the tensor pack
- * @param[in] tensor_pack Tensor Pack to be added
- */
- void add_tensor_pack(UnitWorkload::Id uwk_id, const ITensorPack &tensor_pack);
-
-private:
- std::map<UnitWorkload::Id, ITensorPack> _tensor_packs{};
-};
-
-/** Holder of any auxiliary CLTensors required by a ClWorkload.
- *
- * @note The tensors are not allocated by default, and require the user to explicitly allocate them using the TensorInfo and AuxMemoryInfo
- *
- * @note This data holder must remain valid until the ClCompositeOperator that it's passed to is out of scope
- *
- * @note External user should not use the add_aux_tensor method, and should only use the data returned by @ref bind_tensors
- */
-class ClAuxTensorData
-{
-public:
- /** A view of a single auxiliary data and the associated TensorInfo and AuxMemoryInfo
- */
- struct DataView
- {
- DataView() = default;
- DataView(CLTensor *tensor, const TensorInfo &tensor_info, const AuxMemoryInfo &memory_info)
- : tensor{ tensor }, tensor_info{ tensor_info }, memory_info{ memory_info }
- {
- }
- ~DataView() = default;
- DataView(const DataView &other) = default;
- DataView &operator=(const DataView &other) = default;
- DataView(DataView &&other) = default;
- DataView &operator=(DataView &&other) = default;
- CLTensor *tensor{}; /**< Pointer to the auxiliary tensor */
- TensorInfo tensor_info{}; /**< Associated TensorInfo */
- AuxMemoryInfo memory_info{}; /**< Memory requirement */
- };
-
- /** Add auxiliary tensor.
- *
- * @note Should not be used by external user
- *
- * @param[in] tensor_id Any Id that can uniquely identify an auxiliary tensor. Usually ClWorkloadTensor Id
- * @param[in] tensor_info TensorInfo associated with the tensor
- * @param[in] memory_info Memory requirements
- *
- * @return CLTensor* if successfully added, otherwise nullptr
- */
- CLTensor *add_aux_tensor(int tensor_id, const ITensorInfo &tensor_info, const AuxMemoryInfo &memory_info);
-
- /** Get views of all auxiliary tensors. This is mainly used for allocating the auxiliary tensors.
- *
- * @return std::vector<DataView>&
- */
- std::vector<DataView> &get_tensors();
-
-private:
- std::map<int, std::unique_ptr<CLTensor>> _owned_tensors{};
- std::vector<DataView> _tensors{};
-};
-
-/** Bind tensor memory to packs used by prepare and run methods. Create auxiliary tensor objects and their memory requirements if needed
- *
- * @note This is the only method for external user to create ClAuxTensorData, and the prepare and run TensorPackMaps
- *
- * @param[out] aux_tensor_data Auxiliary Tensors required by the workload
- * @param[out] prepare_pack_map TensorPackMap used by the prepare method
- * @param[out] run_pack_map TensorPackMap used by the run method
- * @param[in] workload ClWorkload to bind the tensors to
- * @param[in] op_tensors CLTensor memory objects mapped from Core OpTensors
- *
- * @return Status
- */
-Status bind_tensors(ClAuxTensorData &aux_tensor_data, TensorPackMap &prepare_pack_map, TensorPackMap &run_pack_map, const ClWorkload &workload, const OpTensorBinding &op_tensors);
-
-/** Operator runtime to run a @ref ClWorkload
- *
- * @note User must explicitly call prepare before run otherwise run will fail.
- *
- */
-class ClCompositeOperator
-{
-public:
- ClCompositeOperator();
- ~ClCompositeOperator();
- /** Configures a @ref ClCompositeOperator with a @ref ClWorkload
- * This includes the compilation of Cl kernels inside the @ref ClWorkload
- *
- * @param[in] ctx CLCompileContext
- * @param[in] workload ClWorkload to configure with
- */
- void configure(const CLCompileContext &ctx, const ClWorkload &workload);
- /** Validate ClWorkload @p workload
- *
- * @param[in] workload ClWorkload to be validated
- *
- * @return Status
- */
- static Status validate(const ClWorkload &workload);
- /** Enqueue prepare workloads
- *
- * @param tensor_pack_map Tensors required by the prepare workloads
- */
- void prepare(TensorPackMap &tensor_pack_map);
- /** Enqueue run workloads
- *
- * @param tensor_pack_map Tensors required by the run workloads
- */
- void run(TensorPackMap &tensor_pack_map);
-
-private:
- struct Implementation;
- std::unique_ptr<Implementation> _impl;
-};
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMIC_FUSION_CLCOMPOSITEOPERATOR_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file