diff options
author | SiCong Li <sicong.li@arm.com> | 2022-01-28 18:24:39 +0000 |
---|---|---|
committer | SiCong Li <sicong.li@arm.com> | 2022-05-06 15:01:45 +0000 |
commit | b63b1196adea8b07dd8db77c2492a212650deba0 (patch) | |
tree | b264035197873f56c69784bec68cad7041b5d423 /src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h | |
parent | 3bb72b69566f18ad5c9446d318d2fc2b5f6dba42 (diff) | |
download | ComputeLibrary-b63b1196adea8b07dd8db77c2492a212650deba0.tar.gz |
Integrate Dynamic Fusion patches
* Add public interfaces:
* OperatorGraph: Describe a workload that could contain fused kernels
* IWorkload: Generic interface for workloads built from OperatorGraph
* ClWorkload: OpenCL workloads built from OperatorGraph
* ClCompositeOperator: Runtime async operator to execute a ClWorkload
* DependencyGraph (will likely be deprecated in later iterations)
* Add example
* cl_fused_conv2d_elementwise_add.cpp to explain how to use the new
interfaces
* Add internal translation layer
* Refactor ClKernelBuildingAPI
* Remove non-tile based gemm native kernel component
* Minor interface changes
* Add integration tests
Resolves COMPMID-5161
Signed-off-by: SiCong Li <sicong.li@arm.com>
Change-Id: Ib987ed79289ab0bcbd3130d54f5793408d9f1240
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7510
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h')
-rw-r--r-- | src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h | 201 |
1 files changed, 27 insertions, 174 deletions
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h index 23629f47bc..3dccdd7351 100644 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h @@ -21,13 +21,18 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION) +#ifndef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION +#error "This experimental feature must be enabled with -DENABLE_EXPERIMENTAL_DYNAMIC_FUSION" +#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ #ifndef ARM_COMPUTE_EXPERIMENTAL_CLKERNELBUILDINGAPI_H #define ARM_COMPUTE_EXPERIMENTAL_CLKERNELBUILDINGAPI_H #include "arm_compute/core/CL/CLCompileContext.h" #include "arm_compute/core/Window.h" +#include "arm_compute/core/experimental/ClWorkload.h" +#include "arm_compute/core/experimental/DependencyGraph.h" +#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h" namespace arm_compute { @@ -35,46 +40,9 @@ namespace experimental { namespace dynamic_fusion { -using ArgumentID = int32_t; +using ArgumentID = DependencyGraph::Id; -static constexpr ArgumentID g_arg_placeholder = -1; - -/** Verbose and explicit way to enumerate all the tensor arguments variants used by - * all kernel implementations. This avoids any ambiguity in what kernel arguments are passed - */ -enum class TensorArgType : int -{ - Scalar, - - Vector, - - Image, - Image_Reinterpret_As_3D, - Image_Export_To_ClImage2D, - - Image_3D, // 3D Tensor represented as a 2D Image + stride_z - Image_3D_Export_To_ClImage2D, - - Tensor_3D, - Tensor_4D, - - Tensor_4D_t_Buffer, - Tensor_4D_t_Image -}; -/** Describes all the info required to add a kernel argument at run time */ -struct ClKernelArgRuntimeDescriptor -{ - ClKernelArgRuntimeDescriptor(int arg_id, TensorArgType type, bool slide_along_dimz = true) - : arg_id{ arg_id }, tensor_arg_type{ type }, slide_along_dimz{ slide_along_dimz } - { - } - ~ClKernelArgRuntimeDescriptor() = default; - int arg_id{ g_arg_placeholder }; // Arg ID in the blueprint - TensorArgType tensor_arg_type{ TensorArgType::Image }; - bool slide_along_dimz{ true }; -}; - -using ClKernelArgList = std::vector<ClKernelArgRuntimeDescriptor>; +static constexpr ArgumentID g_arg_placeholder = DependencyGraph::empty_id(); /** Intermediate representation of the final, complete kernel source. */ class ClKernelBlueprint @@ -93,145 +61,38 @@ public: }; ///// Kernel Components ///// - -/** Meta information about all Cl Kernel Components */ -struct ClKernelComponentDescriptor -{ - int32_t version{ 1 }; /**< Operator version */ -}; - -/** Component: Tensor Argument */ -struct ClTensorDescriptor -{ - ClTensorDescriptor(ITensorInfo *info) - : tensor_info(info) - { - } - - ITensorInfo *tensor_info; -}; - -Status add_tensor_argument(ClKernelBlueprint &, const ClTensorDescriptor &, ArgumentID &); -Status add_tensor_intermed(ClKernelBlueprint &, ArgumentID &); - -/** Component: Gemm Native */ -struct GemmNativeDescriptor -{ - float alpha{}; - float beta{}; - unsigned int m{}; - unsigned int n{}; - unsigned int k{}; - unsigned int depth_output_gemm3d{}; - bool reinterpret_input_as_3d{}; - bool broadcast_bias{}; - bool fp_mixed_precision{}; - bool has_pad_y{}; - int nmult_transpose1xW_width{}; - int mult_interleave4x4_height{}; - GEMMLHSMatrixInfo lhs_info{}; - GEMMRHSMatrixInfo rhs_info{}; - int32_t a_offset{}; - int32_t b_offset{}; -}; - -Status add_kcomp_gemm_native(ClKernelBlueprint &, const ClKernelComponentDescriptor &, const GemmNativeDescriptor &, - ArgumentID lhs_id, ArgumentID rhs_id, ArgumentID bias_id, ArgumentID &dst_id); - /** Component: Eltwise Add */ -struct EltwiseAddDescriptor -{ - ConvertPolicy convert_policy{ ConvertPolicy::SATURATE }; -}; -Status add_kcomp_eltwise_add(ClKernelBlueprint &, const ClKernelComponentDescriptor &, const EltwiseAddDescriptor &, ArgumentID src0_id, +Status add_kcomp_eltwise_add(ClKernelBlueprint &, const ClEltwiseAddKernelDescriptor &, ArgumentID src0_id, ArgumentID src1_id, ArgumentID &dst_id); /** Component: Activation */ -struct ActivationDescriptor -{ -}; -Status add_kcomp_activation(ClKernelBlueprint &, const ClKernelComponentDescriptor &, const ActivationDescriptor &, ArgumentID src_id, ArgumentID &dst_id); +Status add_kcomp_activation(ClKernelBlueprint &, const ClActivationKernelDescriptor &, ArgumentID src_id, ArgumentID &dst_id); /** Component: Direct Convolution **/ -struct DirectConvolutionDescriptor -{ - PadStrideInfo pad_stride_info{}; -}; -Status add_kcomp_direct_conv(ClKernelBlueprint &, const ClKernelComponentDescriptor &, const DirectConvolutionDescriptor &, - ArgumentID src_id, ArgumentID weight_id, ArgumentID bias_id, ArgumentID &dst_id); - -enum class ClippingStrategy -{ - TOP_LEFT, - TOP_RIGHT, - BOTTOM_LEFT, - BOTTOM_RIGHT, -}; +Status add_kcomp_direct_conv2d(ClKernelBlueprint &, const ClDirectConv2dKernelDescriptor &, + ArgumentID src_id, ArgumentID weight_id, ArgumentID bias_id, ArgumentID &dst_id); -/** Component: Store */ -struct TileDescriptor -{ - Size2D tile_dims{}; - Size2D boundaries{}; - ClippingStrategy clipping{ ClippingStrategy::TOP_LEFT }; - - TileDescriptor() - { - } +Status add_kcomp_store(ClKernelBlueprint &, const StoreType &store_type, ArgumentID src_id, ArgumentID dst_id); - TileDescriptor(Size2D dims, const Size2D &bound, const ClippingStrategy &clip) - : tile_dims(dims), boundaries(bound), clipping(clip) - { - } - - bool empty() const - { - return (tile_dims.area() == 0) || (boundaries.area() == 0); - } -}; - -enum class StoreType -{ - VStore, - VStorePartial, - StoreRow, - ConvertStoreRow, - StoreBlock, - ConvertStoreBlock, - StoreRowPartial, - StoreBlockPartial, - StoreBlockBoundaryAware, - StoreVectorSelect, - TStoreIndirectWidthSelect -}; - -Status add_kcomp_store(ClKernelBlueprint &, const ClKernelComponentDescriptor &, ArgumentID src_id, ArgumentID dst_id, const StoreType &store_type); +Status add_tensor(ClKernelBlueprint &, ITensorInfo *, ArgumentID &, ArgumentID merge_point = DependencyGraph::empty_id()); ///// Kernel Components ///// ///// Building ///// -/** Information required for kernel compilation. The build results of KernelBlueprint */ -struct ClKernelCode -{ - std::string name{}; /**< Kernel name */ - std::string code{}; /**< Kernel source code */ - std::string config_id{}; /**< Generated from blueprint based on complex component */ - CLBuildOptions build_options{}; /**< Kernel build options */ - Window window{}; /**< Execution window */ - ClKernelArgList arguments{}; /**< Kernel argument specficiations */ - - bool operator==(const ClKernelCode &other) const - { - return name == other.name && code == other.code && build_options == other.build_options; - } -}; +/** Update existing merge tensor @p merge_point to point to @p t_id + * + * @param t_id + * @param merge_point + * @return Status + */ +Status update_merge_point(ClKernelBlueprint &, ArgumentID t_id, ArgumentID merge_point); -/** GPU information for building the @ref ClKernelCode */ -struct GpuInfo -{ - GPUTarget target{ GPUTarget::UNKNOWN }; -}; +/** Get dependency graph + * + * @return DependencyGraph + */ +DependencyGraph get_dependency_graph(const ClKernelBlueprint &blueprint); /** All information required for building the @ref ClKernelCode */ struct ClCodeBuilderContext @@ -247,12 +108,6 @@ Status build(ClKernelCode &code, const ClCodeBuilderContext &, ClKernelBlueprint ///// Building ///// ///// Tuning ///// -struct ClExecutionDescriptor -{ - cl::NDRange suggested_lws{}; /**< Suggested local work-group size for optimal performance if not zero */ - cl::NDRange gws{}; /**< Global work-group to be used */ - bool skip_sliding_window{ false }; /**< Skip sliding window slices during execution loop */ -}; Status tune_static(ClExecutionDescriptor &, const ClKernelCode &); @@ -261,6 +116,4 @@ Status tune_static(ClExecutionDescriptor &, const ClKernelCode &); } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif //ARM_COMPUTE_EXPERIMENTAL_CLKERNELBUILDINGAPI_H - -#endif // defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
\ No newline at end of file +#endif //ARM_COMPUTE_EXPERIMENTAL_CLKERNELBUILDINGAPI_H
\ No newline at end of file |