diff options
Diffstat (limited to 'arm_compute/core/experimental')
-rw-r--r-- | arm_compute/core/experimental/ClWorkload.h | 220 | ||||
-rw-r--r-- | arm_compute/core/experimental/DependencyGraph.h | 278 | ||||
-rw-r--r-- | arm_compute/core/experimental/IWorkload.h | 133 | ||||
-rw-r--r-- | arm_compute/core/experimental/OperatorGraph.h | 211 | ||||
-rw-r--r-- | arm_compute/core/experimental/Types.h | 28 |
5 files changed, 857 insertions, 13 deletions
diff --git a/arm_compute/core/experimental/ClWorkload.h b/arm_compute/core/experimental/ClWorkload.h new file mode 100644 index 0000000000..bcac08b9f7 --- /dev/null +++ b/arm_compute/core/experimental/ClWorkload.h @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION +#error "This experimental feature must be enabled with -DENABLE_EXPERIMENTAL_DYNAMIC_FUSION" +#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ +#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLWORKLOAD_H +#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLWORKLOAD_H + +#include "arm_compute/core/CL/CLCompileContext.h" +#include "arm_compute/core/GPUTarget.h" +#include "arm_compute/core/Window.h" + +#include "arm_compute/core/experimental/IWorkload.h" +#include "arm_compute/core/experimental/OperatorGraph.h" + +#include <map> + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +/** Verbose and explicit way to enumerate all the tensor arguments variants used by + * all kernel implementations. This avoids any ambiguity in what kernel arguments are passed + */ +enum class ClKernelTensorArgType : int +{ + Scalar, + + Vector, + + Image, + Image_Reinterpret_As_3D, + Image_Export_To_ClImage2D, + + Image_3D, // 3D Tensor represented as a 2D Image + stride_z + Image_3D_Export_To_ClImage2D, + + Tensor_3D, + Tensor_4D, + Tensor_4D_t_Buffer, + Tensor_4D_t_Image +}; + +/** Describes all the info required to add a kernel argument at run time + * + * @note This struct can later be expanded into a more concise and formal way to specify how to set up + * arguments for a kernel inside a @ref ClUnitWorkload + */ +struct ClKernelArgDescriptor +{ + ClKernelArgDescriptor() = default; + ClKernelArgDescriptor(int arg_id, ClKernelTensorArgType type, bool slide_along_dimz = true) + : arg_id{ arg_id }, tensor_arg_type{ type }, slide_along_dimz{ slide_along_dimz } + { + } + ~ClKernelArgDescriptor() = default; + friend bool operator==(const ClKernelArgDescriptor &arg0, const ClKernelArgDescriptor &arg1) + { + return (arg0.tensor_arg_type == arg1.tensor_arg_type) && (arg0.slide_along_dimz == arg1.slide_along_dimz); + } + int arg_id{ -1 }; /**< Arg ID in the blueprint, -1 means empty / uninitialized */ + ClKernelTensorArgType tensor_arg_type{ ClKernelTensorArgType::Image }; /**< tensor argument type */ + bool slide_along_dimz{ true }; /**< @note slide_along_dimz will be moved out of this descriptor in later iterations */ +}; + +using ClKernelArgList = std::map<int, ClKernelArgDescriptor>; + +/** Descriptor containing information required to run a single ClWorkload + */ +struct ClExecutionDescriptor +{ + cl::NDRange suggested_lws{}; /**< Suggested local work-group size for optimal performance if not zero */ + cl::NDRange gws{}; /**< Global work-group to be used */ + bool skip_sliding_window{ false }; /**< Skip sliding window slices during execution loop */ +}; + +/** Contains kernel code to be compiled and run in a ClUnitWorkload + */ +struct ClKernelCode +{ + friend bool operator==(const ClKernelCode &code0, const ClKernelCode &code1) + { + return (code0.name == code1.name) && (code0.code == code1.code) && (code0.config_id == code1.config_id) && (code0.build_options == code1.build_options) && (code0.window == code1.window) + && (code0.arguments == code1.arguments); + } + std::string name{}; /**< Kernel name */ + std::string code{}; /**< Kernel source code */ + std::string config_id{}; /**< Generated from blueprint based on complex component */ + CLBuildOptions build_options{}; /**< Kernel build options */ + Window window{}; /**< Execution window */ + ClKernelArgList arguments{}; /**< Kernel argument descriptors. map key is kernel ArgumentID */ +}; + +/** A descriptor of ClWorkload Tensors. + */ +struct ClWorkloadTensor : public WorkloadTensor +{ + ClWorkloadTensor() = default; + ClWorkloadTensor(Id id, ITensorInfo *info, MemoryType memory_type, const AuxMemoryInfo &memory_info, const ClKernelArgDescriptor &kernel_arg) + : WorkloadTensor{ id, info, memory_type, memory_info }, kernel_arg{ kernel_arg } + { + } + ClKernelArgDescriptor kernel_arg{}; + friend bool operator==(const ClWorkloadTensor &t0, const ClWorkloadTensor &t1) + { + return t0.info == t1.info && t0.memory_info == t1.memory_info && t0.memory_type == t1.memory_type && t0.kernel_arg == t1.kernel_arg; + } +}; + +/** The basic atomic unit in a @ref ClWorkload. It contains exactly one kernel to run. + */ +struct ClUnitWorkload : public UnitWorkload +{ + ClUnitWorkload() = default; + ClUnitWorkload(Id id, UnitWorkloadStage stage, const ClKernelCode &code) + : UnitWorkload{ id, stage }, code{ code } + { + } + friend bool operator==(const ClUnitWorkload &uworkload0, const ClUnitWorkload &uworkload1) + { + return uworkload0.stage == uworkload1.stage && uworkload0.code == uworkload1.code; + } + ClKernelCode code{}; +}; + +/** GPU information for @ref ClWorkloadContext + */ +struct GpuInfo +{ + friend bool operator==(const GpuInfo &info0, const GpuInfo &info1) + { + return info0.target == info1.target; + } + GPUTarget target{ GPUTarget::UNKNOWN }; +}; + +/** Context (device capabilities, platform details) associated with a ClWorkload + * + * It is required for building the @ref ClKernelCode and could also be used by the runtime (e.g. schedulers) + */ +struct ClWorkloadContext +{ + friend bool operator==(const ClWorkloadContext &ctx0, const ClWorkloadContext &ctx1) + { + return ctx0.gpu_info == ctx1.gpu_info; + } + GpuInfo gpu_info{}; +}; + +/** Workload for Cl backend + */ +struct ClWorkload : public IWorkload +{ + Tid add_workload_tensor(ITensorInfo *info, MemoryType memory_type, const AuxMemoryInfo &memory_info, const ClKernelArgDescriptor &kernel_arg, Tid merge_point) + { + Tid id = graph.add_tensor(merge_point); + if(tensors.find(id) == tensors.end()) + { + tensors[id] = ClWorkloadTensor(id, info, memory_type, memory_info, kernel_arg); + } + return id; + } + UnitWorkId add_unit_workload(UnitWorkloadStage stage, const ClKernelCode &code, const std::vector<Tid> &inputs, const std::vector<Tid> &outputs) + { + auto op = graph.add_operator(inputs, outputs); + auto id = op.second; + unit_workloads[id] = ClUnitWorkload(id, stage, code); + return id; + } + friend bool operator==(const ClWorkload &workload0, const ClWorkload &workload1) + { + return std::make_tuple( + workload0.graph, workload0.context, workload0.unit_workloads, workload0.tensors, workload0.op_tensor_id_lut) + == std::make_tuple( + workload1.graph, workload1.context, workload1.unit_workloads, workload1.tensors, workload1.op_tensor_id_lut); + } + ClWorkloadContext context{}; /**< Workload context*/ + std::map<UnitWorkId, ClUnitWorkload> unit_workloads{}; /**< Unit workloads to run*/ + std::map<Tid, ClWorkloadTensor> tensors{}; /**< Workload tensors*/ + std::map<Tid, OpTensor::Id> op_tensor_id_lut{}; /**< Map from ClWorkloadTensor to SRC and DST Operator Tensors (no need to store "intermediate" Operator Tensors)*/ + Status status{}; /**< For compatibility with the IOperator validate method. Store if the workload is valid or not. */ +}; + +/** Build a @ref ClWorkload from an @ref OperatorGraph. + * + * @param[out] workload + * @param[in] op_graph + * @param[in] ctx + * @return Status + */ +Status build(ClWorkload &workload, const OperatorGraph &op_graph, const ClWorkloadContext &ctx); + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute + +#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLWORKLOAD_H
\ No newline at end of file diff --git a/arm_compute/core/experimental/DependencyGraph.h b/arm_compute/core/experimental/DependencyGraph.h new file mode 100644 index 0000000000..794bf0e344 --- /dev/null +++ b/arm_compute/core/experimental/DependencyGraph.h @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION +#error "This experimental feature must be enabled with -DENABLE_EXPERIMENTAL_DYNAMIC_FUSION" +#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ +#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_DEPENDENCYGRAPH_H +#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_DEPENDENCYGRAPH_H + +#include "arm_compute/core/Error.h" + +#include <algorithm> +#include <map> +#include <vector> + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +template <typename T> +bool is_in(const T &v, const std::vector<T> &vec) +{ + return std::find(std::begin(vec), std::end(vec), v) != std::end(vec); +} + +/** The dependency graph of a workload, where the nodes are of 2 types: Tensor or Operator + * Represented as a doubly-linked adjacency list with the differentiation between source and destination + * + * A "Merge Tensor" is an external tensor associated with the tensor within the graph, and serve as a merge point + */ +class DependencyGraph +{ +public: + /** A serial Id allocator + * + */ + class SerialIdAllocator + { + public: + using Id = int; + Id alloc() + { + return _counter++; + } + constexpr static Id empty() + { + return -1; + } + + private: + Id _counter{ 0 }; + }; + using Id = SerialIdAllocator::Id; + /** Adjacency list + * + */ + using AdjList = std::map<Id, std::vector<Id>>; + + /** A pack of operator including its input and output tensors, used by traversing through the graph in topological order + * + */ + struct OpPack + { + Id op{}; + std::vector<Id> inputs{}; + std::vector<Id> outputs{}; + friend bool operator==(const OpPack &opp0, const OpPack &opp1) + { + return std::make_tuple( + opp0.op, opp0.inputs, opp0.outputs) + == std::make_tuple( + opp1.op, opp1.inputs, opp1.outputs); + } + }; + +public: + constexpr static Id empty_id() + { + return SerialIdAllocator::empty(); + } + + DependencyGraph() = default; + // Used in cases where two DependencyGraphs may want to share the same configuration of tensors + explicit DependencyGraph(const std::vector<Id> &imported_tensors); + // Testing only + DependencyGraph(const AdjList &adj_src_tensors, const AdjList &adj_dst_tensors, const AdjList &adj_src_ops, const AdjList &adj_dst_ops, std::map<Id, Id> merge_points = {}); + + /** Add a new tensor + * + * @param merge_tensor The external merge point associated with the tensor. Leave empty if not needed. + * @return Id The newly allocated tensor, or a previously added tensor associated with @p merge_tensor + */ + Id add_tensor(Id merge_tensor = empty_id()); + + void remove_tensor(Id tensor); + + /** Add a new operator + * + * @param inputs Input tensors to the operator + * @param outputs Output tensors to the operator + * @return std::pair<Status, DependencyGraph::Id> where id is the newly allocated operator + */ + std::pair<Status, DependencyGraph::Id> add_operator(const std::vector<Id> &inputs, const std::vector<Id> &outputs); + + void remove_operator(Id op); + /** Sort the graph in a topological order + * + * @return std::pair<Status, std::vector<OpPack>> + */ + std::pair<Status, std::vector<OpPack>> topological_sort() const; + + std::vector<Id> src_ops(Id op) const; + std::vector<Id> dst_ops(Id op) const; + + std::vector<Id> src_ops_from_tensor(Id tensor) const; + std::vector<Id> dst_ops_from_tensor(Id tensor) const; + /** Get the merge points object + * + * @return std::map<Id, Id> + */ + std::map<Id, Id> get_merge_points() const; + /** Get all root ops. Root ops can also be referred to as "src ops" of the whole graph + * + * @return std::vector<Id> + */ + std::vector<Id> get_root_ops() const; + /** Get all dst ops of the whole graph + * + * @return std::vector<Id> + */ + std::vector<Id> get_dst_ops() const; + + /** Get source tensors to an operator + * + * @param op + * @return std::vector<Id> + */ + std::vector<Id> src_tensors(Id op) const; + /** Get destination tensors to an operator + * + * @param op + * @return std::vector<Id> + */ + std::vector<Id> dst_tensors(Id op) const; + /** Get source tensors of the whole graph + * + * @return std::vector<Id> + */ + std::vector<Id> src_tensors() const; + /** Get destination tensors of the whole graph + * + * @return std::vector<Id> + */ + std::vector<Id> dst_tensors() const; + /** Get all operators + * + * @return std::vector<Id> + */ + std::vector<Id> all_ops() const; + /** Get all tensors + * + * @return std::vector<Id> + */ + std::vector<Id> all_tensors() const; + /** Number of operators + * + * @return unsigned int + */ + unsigned int number_of_ops() const; + /** Number of tensors + * + * @return unsigned int + */ + unsigned int number_of_tensors() const; + + /** Update @p merge_point to point to @p t_id + * + * @param t_id + * @param merge_point + */ + Status update_merge_point(Id t_id, Id merge_point); + + /** Strict equality comparison (all internal ids and order of insertion matter). + * In the future this may be replaced with a topological comparison, allowing equivalent graphs with different internal ids to be equal + * + * + * @param g0 + * @param g1 + * @return true + * @return false + */ + friend bool operator==(const DependencyGraph &g0, const DependencyGraph &g1) + { + // Do not compare id allocators + return std::make_tuple( + g0._adj_src_tensors, g0._adj_dst_tensors, g0._adj_src_ops, g0._adj_dst_ops, g0._merge_to_internal) + == std::make_tuple( + g1._adj_src_tensors, g1._adj_dst_tensors, g1._adj_src_ops, g1._adj_dst_ops, g1._merge_to_internal); + } + void link_input(Id op, Id in_tensor); + void link_output(Id op, Id out_tensor); + /** Check if there's a path from @p src_tensor to @p dst_op + * + * @param src_tensor + * @param dst_op + * @return true + * @return false + */ + bool path_exists_from_tensor_to_op(Id src_tensor, Id dst_op) const; + /** Check if there's a path from @p src_op to @p dst_op + * + * @param src_op + * @param dst_op + * @return true + * @return false + */ + bool path_exists_from_op_to_op(Id src_op, Id dst_op) const; + /** Check if tensor is the src tensor of the entire graph + * + * @param tensor + * @return true + * @return false + */ + bool is_src_tensor(Id tensor) const; + /** Check if tensor is the dst tensor of the entire graph + * + * @param tensor + * @return true + * @return false + */ + bool is_dst_tensor(Id tensor) const; + +private: + Id insert_new_tensor(); + Id insert_new_op(); + bool tensor_exists(Id tensor) const; + bool operator_exists(Id op) const; + bool is_src_tensor_of(Id op, Id tensor) const; + bool is_dst_tensor_of(Id op, Id tensor) const; + bool are_connected(Id op, Id tensor) const; + +private: + AdjList _adj_src_tensors{}; + AdjList _adj_dst_tensors{}; + AdjList _adj_src_ops{}; + AdjList _adj_dst_ops{}; + std::map<Id, Id> _merge_to_internal{}; // From merge tensor to internal tensor + SerialIdAllocator _operator_id{}; + SerialIdAllocator _tensor_id{}; +}; + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute + +#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_DEPENDENCYGRAPH_H
\ No newline at end of file diff --git a/arm_compute/core/experimental/IWorkload.h b/arm_compute/core/experimental/IWorkload.h new file mode 100644 index 0000000000..942dbb70bb --- /dev/null +++ b/arm_compute/core/experimental/IWorkload.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION +#error "This experimental feature must be enabled with -DENABLE_EXPERIMENTAL_DYNAMIC_FUSION" +#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ +#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IWORKLOAD_H +#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IWORKLOAD_H + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/ITensorInfo.h" +#include "arm_compute/core/experimental/Types.h" + +#include "arm_compute/core/experimental/DependencyGraph.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +/** Describes when a Unit Workload is run. + * + */ +struct UnitWorkloadStage +{ + enum class Stage + { + Prepare, /**< Only run once at the beginning. */ + Run, /**< Run every time after the first time. */ + }; + Stage stage; + friend bool operator==(const UnitWorkloadStage &stage0, const UnitWorkloadStage &stage1) + { + return stage0.stage == stage1.stage; + } +}; +/** Type of memory used by a Workload Tensor + * + */ +enum class MemoryType +{ + Core = 0, /**< Core memory used by the Workload Tensor, e.g. for argument tensors */ + Auxiliary = 1, /**< Auxiliary memory required by the Workload Tensor, e.g. for temporary tensors */ +}; + +using AuxMemoryLifetime = MemoryLifetime; + +/** Memory Info for a @ref WorkloadTensor of Auxiliary memory type. This communicates to the user how much additional + * memory is required for auxiliary tensors + */ +struct AuxMemoryInfo +{ + AuxMemoryInfo() = default; + + AuxMemoryInfo(size_t size, size_t alignment = 0) noexcept + : size(size), + alignment(alignment) + { + } + + AuxMemoryInfo(AuxMemoryLifetime lifetime, size_t size, size_t alignment = 0) noexcept + : lifetime(lifetime), + size(size), + alignment(alignment) + { + } + friend bool operator==(const AuxMemoryInfo &info0, const AuxMemoryInfo &info1) + { + return info0.lifetime == info1.lifetime && info0.size == info1.size && info0.alignment == info1.alignment; + } + + AuxMemoryLifetime lifetime{ AuxMemoryLifetime::Temporary }; /**< Memory lifetime*/ + size_t size{ 0 }; /**< Total memory size in bytes */ + size_t alignment{ 64 }; /**< Memory alignment in bytes */ +}; + +/** A descriptor for IWorkload Tensors. + */ +struct WorkloadTensor +{ + using Id = DependencyGraph::Id; + Id id{}; /**< Id of the workload tensor */ + ITensorInfo *info{}; /**< TensorInfo associated with the workload tensor */ + MemoryType memory_type{}; /**< Memory type */ + AuxMemoryInfo memory_info{}; /**< Auxiliary memory information. This can be ignored if the memory type is Core */ +}; +/** The basic atomic unit in an @ref IWorkload. It contains exactly one kernel to run. + * + */ +struct UnitWorkload +{ + using Id = DependencyGraph::Id; + Id id{}; /**< Id of the unit workload */ + UnitWorkloadStage stage{}; /**< Stage */ +}; + +/** Run-time-agnostic, platform-specific graph that describes everything required to run a workload + * It can be configured into an Arm Compute Library runtime, integrated into the runtime of another framework, or integrated into the compilation flow + */ +struct IWorkload +{ + using UnitWorkId = UnitWorkload::Id; + using Tid = WorkloadTensor::Id; + IWorkload() = default; + virtual ~IWorkload() = default; + DependencyGraph graph{}; /**< Dependency graph of the workload tensors and the unit workloads */ +}; + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute +#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IWORKLOAD_H
\ No newline at end of file diff --git a/arm_compute/core/experimental/OperatorGraph.h b/arm_compute/core/experimental/OperatorGraph.h new file mode 100644 index 0000000000..621a719fe6 --- /dev/null +++ b/arm_compute/core/experimental/OperatorGraph.h @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION +#error "This experimental feature must be enabled with -DENABLE_EXPERIMENTAL_DYNAMIC_FUSION" +#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ + +#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_OPERATORGRAPH +#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_OPERATORGRAPH + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/ITensorInfo.h" + +#include <memory> + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +/** Graph of operators to execute within a Workload. This is a pure descriptive construct. + */ +class OperatorGraph final +{ +public: + struct Implementation; + OperatorGraph(); + ~OperatorGraph(); + +public: + Implementation *impl(); + const Implementation *impl() const; + +private: + std::unique_ptr<Implementation> _impl; +}; + +/** Return the validity of @p op_graph, usually after performing an operation (e.g. add_tensor) on it + * + * @param[in,out] op_graph OperatorGraph to be validated + * + * @return Status + */ +Status validate(const OperatorGraph &op_graph); + +/** Operator Tensor Handle + * This can be either an argument tensor, or an intermediate tensor linking 2 @ref Operator s + */ +class OpTensor final +{ +public: + using Id = int; + OpTensor(Id id = {}); + /** Id of the OpTensor + * @return Id + */ + Id id() const; + +private: + Id _id{}; +}; + +/** Provide order of @ref OpTensor by checking if @p t0 is "lower than" @p t1 + * + * @param[in] t0 OpTensor + * @param[in] t1 OpTensor + * + * @return true if @p t0 is lower than @p t1 + * @return false otherwise + */ +bool operator<(const OpTensor &t0, const OpTensor &t1); + +/** Associate a TensorInfo with a newly created @ref OpTensor in the @p graph. + * + * @note @p info needs to remain in scope and valid until the workload has finished building + * @note Can pass in an empty TensorInfo for a destination Tensor, in which case @p info will be inferred from the source tensors + * + * @param[in,out] graph OperatorGraph where the tensor is added + * @param[in] info TensorInfo to be associated + * + * @return OpTensor + */ +OpTensor add_tensor(OperatorGraph &graph, ITensorInfo &info); + +/** Operator Handle + * This can be used to further modify an existing operator + */ +class Operator final +{ +public: + using Id = int; + Operator(Id id = {}); + /** Id of the Operator + * @return Id + */ + Id id() const; + +private: + Id _id{}; +}; + +/** Provide order of @ref Operator by checking if @p op0 is "lower than" @p op1 + * + * @param[in] op0 Operator + * @param[in] op1 Operator + * + * @return true if @p op0 is lower than @p op1 + * @return false otherwise + */ +bool operator<(const Operator &op0, const Operator &op1); + +/** Padding information for 2D operations like Conv2dDescriptor + */ +struct Padding2D +{ + Padding2D() = default; + Padding2D(size_t left, size_t right, size_t top, size_t bottom) + : left(left), right(right), top(top), bottom(bottom) + { + } + size_t left = { 0 }; /**< Padding across the width dimension on the left, in elements. */ + size_t right = { 0 }; /**< Padding across the width dimension on the right, in elements. */ + size_t top = { 0 }; /**< Padding across the height dimension on the top, in elements. */ + size_t bottom = { 0 }; /**< Padding across the height dimension on the bottom, in elements. */ +}; + +/** Descriptor for Conv2dDescriptor operation + */ +struct Conv2dDescriptor +{ + /* TOSA compliant attribute parameters start */ + Padding2D pad{}; + Size2D stride{ 1U, 1U }; + Size2D dilation{ 1U, 1U }; + /* TOSA compliant attribute parameters end */ + /* Non-TOSA compliant attribute parameters start */ + /* Non-TOSA compliant attribute parameters end */ +}; +/** Add op Conv2d to @p graph + * + * @param[in,out] graph OperatorGraph where the operator is added to + * @param[in] desc Operator descriptor + * @param[in] input Input OpTensor + * @param[in] weights Weights OpTensor + * @param[in] bias (Optional) bias OpTensor + * @param[in] dst Destination OpTensor + * + * @return Operator + */ +Operator add_op_conv2d(OperatorGraph &graph, const Conv2dDescriptor &desc, OpTensor input, OpTensor weights, OpTensor bias, OpTensor dst); +Operator add_op_conv2d(OperatorGraph &graph, const Conv2dDescriptor &desc, OpTensor input, OpTensor weights, OpTensor dst); +/** (Only for Debuging and Testing) Force a conv2d method + * + * @param[in,out] graph OperatorGraph where conv2d op is located + * @param[in] conv2d Conv2d Op + * @param[in] method Forced ConvolutionMethod + */ +void force_conv2d_method(OperatorGraph &graph, Operator conv2d, ConvolutionMethod method); + +/** Descriptor for Addition operation + * + */ +struct AddDescriptor +{ + /* TOSA compliant attribute parameters start */ + /* TOSA compliant attribute parameters end */ + /* Non-TOSA compliant attribute parameters start */ + /* Non-TOSA compliant attribute parameters end */ +}; +/** Add op Add to @p graph, and optionally describes fusion through passing of intermediate @ref OpTensor s + * + * @param[in,out] graph OperatorGraph where the operator is added to + * @param[in] desc Operator descriptor + * @param[in] lhs Lhs OpTensor + * @param[in] rhs Rhs OpTensor + * @param[in] dst Destination OpTensor + * + * @return Operator + */ +Operator add_op_elementwise_add(OperatorGraph &graph, const AddDescriptor &desc, OpTensor lhs, OpTensor rhs, OpTensor dst); + +bool operator==(const OpTensor &t0, const OpTensor &t1); +bool operator==(const Padding2D &pad0, const Padding2D &pad1); +bool operator==(const Conv2dDescriptor &conv2d0, const Conv2dDescriptor &conv2d1); +bool operator==(const AddDescriptor &, const AddDescriptor &); + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute +#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_OPERATORGRAPH
\ No newline at end of file diff --git a/arm_compute/core/experimental/Types.h b/arm_compute/core/experimental/Types.h index c8755dc26c..1995ab045e 100644 --- a/arm_compute/core/experimental/Types.h +++ b/arm_compute/core/experimental/Types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021 Arm Limited. + * Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -41,20 +41,22 @@ enum TensorType : int32_t ACL_SRC_DST = 0, // Src - ACL_SRC = 0, - ACL_SRC_0 = 0, - ACL_SRC_1 = 1, - ACL_SRC_2 = 2, - ACL_SRC_3 = 3, - ACL_SRC_4 = 4, - ACL_SRC_5 = 5, - ACL_SRC_6 = 6, + ACL_SRC = 0, + ACL_SRC_0 = 0, + ACL_SRC_1 = 1, + ACL_SRC_2 = 2, + ACL_SRC_3 = 3, + ACL_SRC_4 = 4, + ACL_SRC_5 = 5, + ACL_SRC_6 = 6, + ACL_SRC_END = 6, // Dst - ACL_DST = 30, - ACL_DST_0 = 30, - ACL_DST_1 = 31, - ACL_DST_2 = 32, + ACL_DST = 30, + ACL_DST_0 = 30, + ACL_DST_1 = 31, + ACL_DST_2 = 32, + ACL_DST_END = 32, // Aux ACL_INT = 50, |