From b63b1196adea8b07dd8db77c2492a212650deba0 Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Fri, 28 Jan 2022 18:24:39 +0000 Subject: Integrate Dynamic Fusion patches * Add public interfaces: * OperatorGraph: Describe a workload that could contain fused kernels * IWorkload: Generic interface for workloads built from OperatorGraph * ClWorkload: OpenCL workloads built from OperatorGraph * ClCompositeOperator: Runtime async operator to execute a ClWorkload * DependencyGraph (will likely be deprecated in later iterations) * Add example * cl_fused_conv2d_elementwise_add.cpp to explain how to use the new interfaces * Add internal translation layer * Refactor ClKernelBuildingAPI * Remove non-tile based gemm native kernel component * Minor interface changes * Add integration tests Resolves COMPMID-5161 Signed-off-by: SiCong Li Change-Id: Ib987ed79289ab0bcbd3130d54f5793408d9f1240 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7510 Reviewed-by: Gian Marco Iodice Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- arm_compute/core/TensorInfo.h | 18 +- arm_compute/core/Types.h | 17 +- arm_compute/core/Window.h | 21 +- arm_compute/core/Window.inl | 7 +- arm_compute/core/experimental/ClWorkload.h | 220 +++++++++++++++++++ arm_compute/core/experimental/DependencyGraph.h | 278 ++++++++++++++++++++++++ arm_compute/core/experimental/IWorkload.h | 133 ++++++++++++ arm_compute/core/experimental/OperatorGraph.h | 211 ++++++++++++++++++ arm_compute/core/experimental/Types.h | 28 +-- 9 files changed, 915 insertions(+), 18 deletions(-) create mode 100644 arm_compute/core/experimental/ClWorkload.h create mode 100644 arm_compute/core/experimental/DependencyGraph.h create mode 100644 arm_compute/core/experimental/IWorkload.h create mode 100644 arm_compute/core/experimental/OperatorGraph.h (limited to 'arm_compute/core') diff --git a/arm_compute/core/TensorInfo.h b/arm_compute/core/TensorInfo.h index 9bc86806fb..40f9ed9806 100644 --- a/arm_compute/core/TensorInfo.h +++ b/arm_compute/core/TensorInfo.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -297,6 +297,7 @@ public: _are_values_constant = are_values_constant; return *this; } + inline friend bool operator==(const TensorInfo &lhs, const TensorInfo &rhs); private: /** Calculates strides, offset and total size resulting from the specified padding around the XY plane. @@ -320,5 +321,20 @@ private: DataLayout _data_layout; bool _are_values_constant; }; + +/** Check whether two tensor info are equal. + * + * @param[in] lhs LHS tensor info. + * @param[in] rhs RHS tensor info. + * + * @return True if the given tensor infos are the same. + */ +inline bool operator==(const TensorInfo &lhs, const TensorInfo &rhs) +{ + return (lhs._total_size == rhs._total_size) && (lhs._offset_first_element_in_bytes == rhs._offset_first_element_in_bytes) && (lhs._strides_in_bytes == rhs._strides_in_bytes) + && (lhs._num_channels == rhs._num_channels) && (lhs._tensor_shape == rhs._tensor_shape) && (lhs._dims_state == rhs._dims_state) && (lhs._data_type == rhs._data_type) && (lhs._format == rhs._format) + && (lhs._is_resizable == rhs._is_resizable) && (lhs._valid_region == rhs._valid_region) && (lhs._padding == rhs._padding) && (lhs._quantization_info == rhs._quantization_info) + && (lhs._data_layout == rhs._data_layout) && (lhs._are_values_constant == rhs._are_values_constant); +} } // namespace arm_compute #endif /*ARM_COMPUTE_TENSORINFO_H */ diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 1548816e91..7ae6a7e67e 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -253,9 +253,22 @@ struct ValidRegion return *this; } + /** Check whether two valid regions are equal. + * + * @param[in] lhs LHS valid region + * @param[in] rhs RHS valid region + * + * @return True if the valid regions are the same. + */ + inline friend bool operator==(const ValidRegion &lhs, const ValidRegion &rhs); + Coordinates anchor; /**< Anchor for the start of the valid region. */ TensorShape shape; /**< Shape of the valid region. */ }; +inline bool operator==(const ValidRegion &lhs, const ValidRegion &rhs) +{ + return (lhs.anchor == rhs.anchor) && (lhs.shape == rhs.shape); +} /** Methods available to handle borders */ enum class BorderMode @@ -346,7 +359,7 @@ struct BorderSize * * @return true if they are equal */ - bool operator==(const BorderSize &rhs) + bool operator==(const BorderSize &rhs) const { return (top == rhs.top) && (right == rhs.right) && (bottom == rhs.bottom) && (left == rhs.left); } @@ -357,7 +370,7 @@ struct BorderSize * * @return true if they are different */ - bool operator!=(const BorderSize &rhs) + bool operator!=(const BorderSize &rhs) const { return !(*this == rhs); } diff --git a/arm_compute/core/Window.h b/arm_compute/core/Window.h index f603e6c148..c566cffa88 100644 --- a/arm_compute/core/Window.h +++ b/arm_compute/core/Window.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2020, 2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -123,6 +123,17 @@ public: { _end = end; } + /** Check whether two Dimensions are equal. + * + * @param[in] lhs LHS Dimensions + * @param[in] rhs RHS Dimensions + * + * @return True if the Dimensions are the same. + */ + friend bool operator==(const Dimension &lhs, const Dimension &rhs) + { + return (lhs._start == rhs._start) && (lhs._end == rhs._end) && (lhs._step == rhs._step); + } private: int _start; /**< Start of the dimension */ @@ -414,6 +425,14 @@ public: * @param[in] rhs Second window to swap. */ friend void swap(Window &lhs, Window &rhs); + /** Check whether two Windows are equal. + * + * @param[in] lhs LHS window + * @param[in] rhs RHS window + * + * @return True if the given windows are the same. + */ + friend bool operator==(const Window &lhs, const Window &rhs); private: /** First slice of the window diff --git a/arm_compute/core/Window.inl b/arm_compute/core/Window.inl index 6100d09a1c..5ee4b57145 100644 --- a/arm_compute/core/Window.inl +++ b/arm_compute/core/Window.inl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2020, 2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -305,4 +305,9 @@ inline void swap(Window &lhs, Window &rhs) { lhs._dims.swap(rhs._dims); } + +inline bool operator==(const Window &lhs, const Window &rhs) +{ + return (lhs._dims == rhs._dims) && (lhs._is_broadcasted == rhs._is_broadcasted); +} } // namespace arm_compute diff --git a/arm_compute/core/experimental/ClWorkload.h b/arm_compute/core/experimental/ClWorkload.h new file mode 100644 index 0000000000..bcac08b9f7 --- /dev/null +++ b/arm_compute/core/experimental/ClWorkload.h @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION +#error "This experimental feature must be enabled with -DENABLE_EXPERIMENTAL_DYNAMIC_FUSION" +#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ +#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLWORKLOAD_H +#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLWORKLOAD_H + +#include "arm_compute/core/CL/CLCompileContext.h" +#include "arm_compute/core/GPUTarget.h" +#include "arm_compute/core/Window.h" + +#include "arm_compute/core/experimental/IWorkload.h" +#include "arm_compute/core/experimental/OperatorGraph.h" + +#include + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +/** Verbose and explicit way to enumerate all the tensor arguments variants used by + * all kernel implementations. This avoids any ambiguity in what kernel arguments are passed + */ +enum class ClKernelTensorArgType : int +{ + Scalar, + + Vector, + + Image, + Image_Reinterpret_As_3D, + Image_Export_To_ClImage2D, + + Image_3D, // 3D Tensor represented as a 2D Image + stride_z + Image_3D_Export_To_ClImage2D, + + Tensor_3D, + Tensor_4D, + Tensor_4D_t_Buffer, + Tensor_4D_t_Image +}; + +/** Describes all the info required to add a kernel argument at run time + * + * @note This struct can later be expanded into a more concise and formal way to specify how to set up + * arguments for a kernel inside a @ref ClUnitWorkload + */ +struct ClKernelArgDescriptor +{ + ClKernelArgDescriptor() = default; + ClKernelArgDescriptor(int arg_id, ClKernelTensorArgType type, bool slide_along_dimz = true) + : arg_id{ arg_id }, tensor_arg_type{ type }, slide_along_dimz{ slide_along_dimz } + { + } + ~ClKernelArgDescriptor() = default; + friend bool operator==(const ClKernelArgDescriptor &arg0, const ClKernelArgDescriptor &arg1) + { + return (arg0.tensor_arg_type == arg1.tensor_arg_type) && (arg0.slide_along_dimz == arg1.slide_along_dimz); + } + int arg_id{ -1 }; /**< Arg ID in the blueprint, -1 means empty / uninitialized */ + ClKernelTensorArgType tensor_arg_type{ ClKernelTensorArgType::Image }; /**< tensor argument type */ + bool slide_along_dimz{ true }; /**< @note slide_along_dimz will be moved out of this descriptor in later iterations */ +}; + +using ClKernelArgList = std::map; + +/** Descriptor containing information required to run a single ClWorkload + */ +struct ClExecutionDescriptor +{ + cl::NDRange suggested_lws{}; /**< Suggested local work-group size for optimal performance if not zero */ + cl::NDRange gws{}; /**< Global work-group to be used */ + bool skip_sliding_window{ false }; /**< Skip sliding window slices during execution loop */ +}; + +/** Contains kernel code to be compiled and run in a ClUnitWorkload + */ +struct ClKernelCode +{ + friend bool operator==(const ClKernelCode &code0, const ClKernelCode &code1) + { + return (code0.name == code1.name) && (code0.code == code1.code) && (code0.config_id == code1.config_id) && (code0.build_options == code1.build_options) && (code0.window == code1.window) + && (code0.arguments == code1.arguments); + } + std::string name{}; /**< Kernel name */ + std::string code{}; /**< Kernel source code */ + std::string config_id{}; /**< Generated from blueprint based on complex component */ + CLBuildOptions build_options{}; /**< Kernel build options */ + Window window{}; /**< Execution window */ + ClKernelArgList arguments{}; /**< Kernel argument descriptors. map key is kernel ArgumentID */ +}; + +/** A descriptor of ClWorkload Tensors. + */ +struct ClWorkloadTensor : public WorkloadTensor +{ + ClWorkloadTensor() = default; + ClWorkloadTensor(Id id, ITensorInfo *info, MemoryType memory_type, const AuxMemoryInfo &memory_info, const ClKernelArgDescriptor &kernel_arg) + : WorkloadTensor{ id, info, memory_type, memory_info }, kernel_arg{ kernel_arg } + { + } + ClKernelArgDescriptor kernel_arg{}; + friend bool operator==(const ClWorkloadTensor &t0, const ClWorkloadTensor &t1) + { + return t0.info == t1.info && t0.memory_info == t1.memory_info && t0.memory_type == t1.memory_type && t0.kernel_arg == t1.kernel_arg; + } +}; + +/** The basic atomic unit in a @ref ClWorkload. It contains exactly one kernel to run. + */ +struct ClUnitWorkload : public UnitWorkload +{ + ClUnitWorkload() = default; + ClUnitWorkload(Id id, UnitWorkloadStage stage, const ClKernelCode &code) + : UnitWorkload{ id, stage }, code{ code } + { + } + friend bool operator==(const ClUnitWorkload &uworkload0, const ClUnitWorkload &uworkload1) + { + return uworkload0.stage == uworkload1.stage && uworkload0.code == uworkload1.code; + } + ClKernelCode code{}; +}; + +/** GPU information for @ref ClWorkloadContext + */ +struct GpuInfo +{ + friend bool operator==(const GpuInfo &info0, const GpuInfo &info1) + { + return info0.target == info1.target; + } + GPUTarget target{ GPUTarget::UNKNOWN }; +}; + +/** Context (device capabilities, platform details) associated with a ClWorkload + * + * It is required for building the @ref ClKernelCode and could also be used by the runtime (e.g. schedulers) + */ +struct ClWorkloadContext +{ + friend bool operator==(const ClWorkloadContext &ctx0, const ClWorkloadContext &ctx1) + { + return ctx0.gpu_info == ctx1.gpu_info; + } + GpuInfo gpu_info{}; +}; + +/** Workload for Cl backend + */ +struct ClWorkload : public IWorkload +{ + Tid add_workload_tensor(ITensorInfo *info, MemoryType memory_type, const AuxMemoryInfo &memory_info, const ClKernelArgDescriptor &kernel_arg, Tid merge_point) + { + Tid id = graph.add_tensor(merge_point); + if(tensors.find(id) == tensors.end()) + { + tensors[id] = ClWorkloadTensor(id, info, memory_type, memory_info, kernel_arg); + } + return id; + } + UnitWorkId add_unit_workload(UnitWorkloadStage stage, const ClKernelCode &code, const std::vector &inputs, const std::vector &outputs) + { + auto op = graph.add_operator(inputs, outputs); + auto id = op.second; + unit_workloads[id] = ClUnitWorkload(id, stage, code); + return id; + } + friend bool operator==(const ClWorkload &workload0, const ClWorkload &workload1) + { + return std::make_tuple( + workload0.graph, workload0.context, workload0.unit_workloads, workload0.tensors, workload0.op_tensor_id_lut) + == std::make_tuple( + workload1.graph, workload1.context, workload1.unit_workloads, workload1.tensors, workload1.op_tensor_id_lut); + } + ClWorkloadContext context{}; /**< Workload context*/ + std::map unit_workloads{}; /**< Unit workloads to run*/ + std::map tensors{}; /**< Workload tensors*/ + std::map op_tensor_id_lut{}; /**< Map from ClWorkloadTensor to SRC and DST Operator Tensors (no need to store "intermediate" Operator Tensors)*/ + Status status{}; /**< For compatibility with the IOperator validate method. Store if the workload is valid or not. */ +}; + +/** Build a @ref ClWorkload from an @ref OperatorGraph. + * + * @param[out] workload + * @param[in] op_graph + * @param[in] ctx + * @return Status + */ +Status build(ClWorkload &workload, const OperatorGraph &op_graph, const ClWorkloadContext &ctx); + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute + +#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLWORKLOAD_H \ No newline at end of file diff --git a/arm_compute/core/experimental/DependencyGraph.h b/arm_compute/core/experimental/DependencyGraph.h new file mode 100644 index 0000000000..794bf0e344 --- /dev/null +++ b/arm_compute/core/experimental/DependencyGraph.h @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION +#error "This experimental feature must be enabled with -DENABLE_EXPERIMENTAL_DYNAMIC_FUSION" +#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ +#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_DEPENDENCYGRAPH_H +#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_DEPENDENCYGRAPH_H + +#include "arm_compute/core/Error.h" + +#include +#include +#include + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +template +bool is_in(const T &v, const std::vector &vec) +{ + return std::find(std::begin(vec), std::end(vec), v) != std::end(vec); +} + +/** The dependency graph of a workload, where the nodes are of 2 types: Tensor or Operator + * Represented as a doubly-linked adjacency list with the differentiation between source and destination + * + * A "Merge Tensor" is an external tensor associated with the tensor within the graph, and serve as a merge point + */ +class DependencyGraph +{ +public: + /** A serial Id allocator + * + */ + class SerialIdAllocator + { + public: + using Id = int; + Id alloc() + { + return _counter++; + } + constexpr static Id empty() + { + return -1; + } + + private: + Id _counter{ 0 }; + }; + using Id = SerialIdAllocator::Id; + /** Adjacency list + * + */ + using AdjList = std::map>; + + /** A pack of operator including its input and output tensors, used by traversing through the graph in topological order + * + */ + struct OpPack + { + Id op{}; + std::vector inputs{}; + std::vector outputs{}; + friend bool operator==(const OpPack &opp0, const OpPack &opp1) + { + return std::make_tuple( + opp0.op, opp0.inputs, opp0.outputs) + == std::make_tuple( + opp1.op, opp1.inputs, opp1.outputs); + } + }; + +public: + constexpr static Id empty_id() + { + return SerialIdAllocator::empty(); + } + + DependencyGraph() = default; + // Used in cases where two DependencyGraphs may want to share the same configuration of tensors + explicit DependencyGraph(const std::vector &imported_tensors); + // Testing only + DependencyGraph(const AdjList &adj_src_tensors, const AdjList &adj_dst_tensors, const AdjList &adj_src_ops, const AdjList &adj_dst_ops, std::map merge_points = {}); + + /** Add a new tensor + * + * @param merge_tensor The external merge point associated with the tensor. Leave empty if not needed. + * @return Id The newly allocated tensor, or a previously added tensor associated with @p merge_tensor + */ + Id add_tensor(Id merge_tensor = empty_id()); + + void remove_tensor(Id tensor); + + /** Add a new operator + * + * @param inputs Input tensors to the operator + * @param outputs Output tensors to the operator + * @return std::pair where id is the newly allocated operator + */ + std::pair add_operator(const std::vector &inputs, const std::vector &outputs); + + void remove_operator(Id op); + /** Sort the graph in a topological order + * + * @return std::pair> + */ + std::pair> topological_sort() const; + + std::vector src_ops(Id op) const; + std::vector dst_ops(Id op) const; + + std::vector src_ops_from_tensor(Id tensor) const; + std::vector dst_ops_from_tensor(Id tensor) const; + /** Get the merge points object + * + * @return std::map + */ + std::map get_merge_points() const; + /** Get all root ops. Root ops can also be referred to as "src ops" of the whole graph + * + * @return std::vector + */ + std::vector get_root_ops() const; + /** Get all dst ops of the whole graph + * + * @return std::vector + */ + std::vector get_dst_ops() const; + + /** Get source tensors to an operator + * + * @param op + * @return std::vector + */ + std::vector src_tensors(Id op) const; + /** Get destination tensors to an operator + * + * @param op + * @return std::vector + */ + std::vector dst_tensors(Id op) const; + /** Get source tensors of the whole graph + * + * @return std::vector + */ + std::vector src_tensors() const; + /** Get destination tensors of the whole graph + * + * @return std::vector + */ + std::vector dst_tensors() const; + /** Get all operators + * + * @return std::vector + */ + std::vector all_ops() const; + /** Get all tensors + * + * @return std::vector + */ + std::vector all_tensors() const; + /** Number of operators + * + * @return unsigned int + */ + unsigned int number_of_ops() const; + /** Number of tensors + * + * @return unsigned int + */ + unsigned int number_of_tensors() const; + + /** Update @p merge_point to point to @p t_id + * + * @param t_id + * @param merge_point + */ + Status update_merge_point(Id t_id, Id merge_point); + + /** Strict equality comparison (all internal ids and order of insertion matter). + * In the future this may be replaced with a topological comparison, allowing equivalent graphs with different internal ids to be equal + * + * + * @param g0 + * @param g1 + * @return true + * @return false + */ + friend bool operator==(const DependencyGraph &g0, const DependencyGraph &g1) + { + // Do not compare id allocators + return std::make_tuple( + g0._adj_src_tensors, g0._adj_dst_tensors, g0._adj_src_ops, g0._adj_dst_ops, g0._merge_to_internal) + == std::make_tuple( + g1._adj_src_tensors, g1._adj_dst_tensors, g1._adj_src_ops, g1._adj_dst_ops, g1._merge_to_internal); + } + void link_input(Id op, Id in_tensor); + void link_output(Id op, Id out_tensor); + /** Check if there's a path from @p src_tensor to @p dst_op + * + * @param src_tensor + * @param dst_op + * @return true + * @return false + */ + bool path_exists_from_tensor_to_op(Id src_tensor, Id dst_op) const; + /** Check if there's a path from @p src_op to @p dst_op + * + * @param src_op + * @param dst_op + * @return true + * @return false + */ + bool path_exists_from_op_to_op(Id src_op, Id dst_op) const; + /** Check if tensor is the src tensor of the entire graph + * + * @param tensor + * @return true + * @return false + */ + bool is_src_tensor(Id tensor) const; + /** Check if tensor is the dst tensor of the entire graph + * + * @param tensor + * @return true + * @return false + */ + bool is_dst_tensor(Id tensor) const; + +private: + Id insert_new_tensor(); + Id insert_new_op(); + bool tensor_exists(Id tensor) const; + bool operator_exists(Id op) const; + bool is_src_tensor_of(Id op, Id tensor) const; + bool is_dst_tensor_of(Id op, Id tensor) const; + bool are_connected(Id op, Id tensor) const; + +private: + AdjList _adj_src_tensors{}; + AdjList _adj_dst_tensors{}; + AdjList _adj_src_ops{}; + AdjList _adj_dst_ops{}; + std::map _merge_to_internal{}; // From merge tensor to internal tensor + SerialIdAllocator _operator_id{}; + SerialIdAllocator _tensor_id{}; +}; + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute + +#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_DEPENDENCYGRAPH_H \ No newline at end of file diff --git a/arm_compute/core/experimental/IWorkload.h b/arm_compute/core/experimental/IWorkload.h new file mode 100644 index 0000000000..942dbb70bb --- /dev/null +++ b/arm_compute/core/experimental/IWorkload.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION +#error "This experimental feature must be enabled with -DENABLE_EXPERIMENTAL_DYNAMIC_FUSION" +#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ +#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IWORKLOAD_H +#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IWORKLOAD_H + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/ITensorInfo.h" +#include "arm_compute/core/experimental/Types.h" + +#include "arm_compute/core/experimental/DependencyGraph.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +/** Describes when a Unit Workload is run. + * + */ +struct UnitWorkloadStage +{ + enum class Stage + { + Prepare, /**< Only run once at the beginning. */ + Run, /**< Run every time after the first time. */ + }; + Stage stage; + friend bool operator==(const UnitWorkloadStage &stage0, const UnitWorkloadStage &stage1) + { + return stage0.stage == stage1.stage; + } +}; +/** Type of memory used by a Workload Tensor + * + */ +enum class MemoryType +{ + Core = 0, /**< Core memory used by the Workload Tensor, e.g. for argument tensors */ + Auxiliary = 1, /**< Auxiliary memory required by the Workload Tensor, e.g. for temporary tensors */ +}; + +using AuxMemoryLifetime = MemoryLifetime; + +/** Memory Info for a @ref WorkloadTensor of Auxiliary memory type. This communicates to the user how much additional + * memory is required for auxiliary tensors + */ +struct AuxMemoryInfo +{ + AuxMemoryInfo() = default; + + AuxMemoryInfo(size_t size, size_t alignment = 0) noexcept + : size(size), + alignment(alignment) + { + } + + AuxMemoryInfo(AuxMemoryLifetime lifetime, size_t size, size_t alignment = 0) noexcept + : lifetime(lifetime), + size(size), + alignment(alignment) + { + } + friend bool operator==(const AuxMemoryInfo &info0, const AuxMemoryInfo &info1) + { + return info0.lifetime == info1.lifetime && info0.size == info1.size && info0.alignment == info1.alignment; + } + + AuxMemoryLifetime lifetime{ AuxMemoryLifetime::Temporary }; /**< Memory lifetime*/ + size_t size{ 0 }; /**< Total memory size in bytes */ + size_t alignment{ 64 }; /**< Memory alignment in bytes */ +}; + +/** A descriptor for IWorkload Tensors. + */ +struct WorkloadTensor +{ + using Id = DependencyGraph::Id; + Id id{}; /**< Id of the workload tensor */ + ITensorInfo *info{}; /**< TensorInfo associated with the workload tensor */ + MemoryType memory_type{}; /**< Memory type */ + AuxMemoryInfo memory_info{}; /**< Auxiliary memory information. This can be ignored if the memory type is Core */ +}; +/** The basic atomic unit in an @ref IWorkload. It contains exactly one kernel to run. + * + */ +struct UnitWorkload +{ + using Id = DependencyGraph::Id; + Id id{}; /**< Id of the unit workload */ + UnitWorkloadStage stage{}; /**< Stage */ +}; + +/** Run-time-agnostic, platform-specific graph that describes everything required to run a workload + * It can be configured into an Arm Compute Library runtime, integrated into the runtime of another framework, or integrated into the compilation flow + */ +struct IWorkload +{ + using UnitWorkId = UnitWorkload::Id; + using Tid = WorkloadTensor::Id; + IWorkload() = default; + virtual ~IWorkload() = default; + DependencyGraph graph{}; /**< Dependency graph of the workload tensors and the unit workloads */ +}; + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute +#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IWORKLOAD_H \ No newline at end of file diff --git a/arm_compute/core/experimental/OperatorGraph.h b/arm_compute/core/experimental/OperatorGraph.h new file mode 100644 index 0000000000..621a719fe6 --- /dev/null +++ b/arm_compute/core/experimental/OperatorGraph.h @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION +#error "This experimental feature must be enabled with -DENABLE_EXPERIMENTAL_DYNAMIC_FUSION" +#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ + +#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_OPERATORGRAPH +#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_OPERATORGRAPH + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/ITensorInfo.h" + +#include + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +/** Graph of operators to execute within a Workload. This is a pure descriptive construct. + */ +class OperatorGraph final +{ +public: + struct Implementation; + OperatorGraph(); + ~OperatorGraph(); + +public: + Implementation *impl(); + const Implementation *impl() const; + +private: + std::unique_ptr _impl; +}; + +/** Return the validity of @p op_graph, usually after performing an operation (e.g. add_tensor) on it + * + * @param[in,out] op_graph OperatorGraph to be validated + * + * @return Status + */ +Status validate(const OperatorGraph &op_graph); + +/** Operator Tensor Handle + * This can be either an argument tensor, or an intermediate tensor linking 2 @ref Operator s + */ +class OpTensor final +{ +public: + using Id = int; + OpTensor(Id id = {}); + /** Id of the OpTensor + * @return Id + */ + Id id() const; + +private: + Id _id{}; +}; + +/** Provide order of @ref OpTensor by checking if @p t0 is "lower than" @p t1 + * + * @param[in] t0 OpTensor + * @param[in] t1 OpTensor + * + * @return true if @p t0 is lower than @p t1 + * @return false otherwise + */ +bool operator<(const OpTensor &t0, const OpTensor &t1); + +/** Associate a TensorInfo with a newly created @ref OpTensor in the @p graph. + * + * @note @p info needs to remain in scope and valid until the workload has finished building + * @note Can pass in an empty TensorInfo for a destination Tensor, in which case @p info will be inferred from the source tensors + * + * @param[in,out] graph OperatorGraph where the tensor is added + * @param[in] info TensorInfo to be associated + * + * @return OpTensor + */ +OpTensor add_tensor(OperatorGraph &graph, ITensorInfo &info); + +/** Operator Handle + * This can be used to further modify an existing operator + */ +class Operator final +{ +public: + using Id = int; + Operator(Id id = {}); + /** Id of the Operator + * @return Id + */ + Id id() const; + +private: + Id _id{}; +}; + +/** Provide order of @ref Operator by checking if @p op0 is "lower than" @p op1 + * + * @param[in] op0 Operator + * @param[in] op1 Operator + * + * @return true if @p op0 is lower than @p op1 + * @return false otherwise + */ +bool operator<(const Operator &op0, const Operator &op1); + +/** Padding information for 2D operations like Conv2dDescriptor + */ +struct Padding2D +{ + Padding2D() = default; + Padding2D(size_t left, size_t right, size_t top, size_t bottom) + : left(left), right(right), top(top), bottom(bottom) + { + } + size_t left = { 0 }; /**< Padding across the width dimension on the left, in elements. */ + size_t right = { 0 }; /**< Padding across the width dimension on the right, in elements. */ + size_t top = { 0 }; /**< Padding across the height dimension on the top, in elements. */ + size_t bottom = { 0 }; /**< Padding across the height dimension on the bottom, in elements. */ +}; + +/** Descriptor for Conv2dDescriptor operation + */ +struct Conv2dDescriptor +{ + /* TOSA compliant attribute parameters start */ + Padding2D pad{}; + Size2D stride{ 1U, 1U }; + Size2D dilation{ 1U, 1U }; + /* TOSA compliant attribute parameters end */ + /* Non-TOSA compliant attribute parameters start */ + /* Non-TOSA compliant attribute parameters end */ +}; +/** Add op Conv2d to @p graph + * + * @param[in,out] graph OperatorGraph where the operator is added to + * @param[in] desc Operator descriptor + * @param[in] input Input OpTensor + * @param[in] weights Weights OpTensor + * @param[in] bias (Optional) bias OpTensor + * @param[in] dst Destination OpTensor + * + * @return Operator + */ +Operator add_op_conv2d(OperatorGraph &graph, const Conv2dDescriptor &desc, OpTensor input, OpTensor weights, OpTensor bias, OpTensor dst); +Operator add_op_conv2d(OperatorGraph &graph, const Conv2dDescriptor &desc, OpTensor input, OpTensor weights, OpTensor dst); +/** (Only for Debuging and Testing) Force a conv2d method + * + * @param[in,out] graph OperatorGraph where conv2d op is located + * @param[in] conv2d Conv2d Op + * @param[in] method Forced ConvolutionMethod + */ +void force_conv2d_method(OperatorGraph &graph, Operator conv2d, ConvolutionMethod method); + +/** Descriptor for Addition operation + * + */ +struct AddDescriptor +{ + /* TOSA compliant attribute parameters start */ + /* TOSA compliant attribute parameters end */ + /* Non-TOSA compliant attribute parameters start */ + /* Non-TOSA compliant attribute parameters end */ +}; +/** Add op Add to @p graph, and optionally describes fusion through passing of intermediate @ref OpTensor s + * + * @param[in,out] graph OperatorGraph where the operator is added to + * @param[in] desc Operator descriptor + * @param[in] lhs Lhs OpTensor + * @param[in] rhs Rhs OpTensor + * @param[in] dst Destination OpTensor + * + * @return Operator + */ +Operator add_op_elementwise_add(OperatorGraph &graph, const AddDescriptor &desc, OpTensor lhs, OpTensor rhs, OpTensor dst); + +bool operator==(const OpTensor &t0, const OpTensor &t1); +bool operator==(const Padding2D &pad0, const Padding2D &pad1); +bool operator==(const Conv2dDescriptor &conv2d0, const Conv2dDescriptor &conv2d1); +bool operator==(const AddDescriptor &, const AddDescriptor &); + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute +#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_OPERATORGRAPH \ No newline at end of file diff --git a/arm_compute/core/experimental/Types.h b/arm_compute/core/experimental/Types.h index c8755dc26c..1995ab045e 100644 --- a/arm_compute/core/experimental/Types.h +++ b/arm_compute/core/experimental/Types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021 Arm Limited. + * Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -41,20 +41,22 @@ enum TensorType : int32_t ACL_SRC_DST = 0, // Src - ACL_SRC = 0, - ACL_SRC_0 = 0, - ACL_SRC_1 = 1, - ACL_SRC_2 = 2, - ACL_SRC_3 = 3, - ACL_SRC_4 = 4, - ACL_SRC_5 = 5, - ACL_SRC_6 = 6, + ACL_SRC = 0, + ACL_SRC_0 = 0, + ACL_SRC_1 = 1, + ACL_SRC_2 = 2, + ACL_SRC_3 = 3, + ACL_SRC_4 = 4, + ACL_SRC_5 = 5, + ACL_SRC_6 = 6, + ACL_SRC_END = 6, // Dst - ACL_DST = 30, - ACL_DST_0 = 30, - ACL_DST_1 = 31, - ACL_DST_2 = 32, + ACL_DST = 30, + ACL_DST_0 = 30, + ACL_DST_1 = 31, + ACL_DST_2 = 32, + ACL_DST_END = 32, // Aux ACL_INT = 50, -- cgit v1.2.1