From 232c45253a84c16fc70eae6406cac5f4048efaca Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Thu, 3 Mar 2022 10:09:01 +0000 Subject: Merge kernel prototype patch Resolves: COMPMID-5151 Signed-off-by: Giorgio Arena Change-Id: Ic4024d5cd4819fe917a1d49621f1866ae2e90a37 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7260 Tested-by: Arm Jenkins Reviewed-by: SiCong Li Comments-Addressed: Arm Jenkins --- .../dynamic_fusion/ClKernelBuildingAPI.h | 255 +++++++++++++++++++++ 1 file changed, 255 insertions(+) create mode 100644 src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h (limited to 'src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h') diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h new file mode 100644 index 0000000000..15622c848d --- /dev/null +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#if defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION) + +#ifndef ARM_COMPUTE_EXPERIMENTAL_CLKERNELBUILDINGAPI_H +#define ARM_COMPUTE_EXPERIMENTAL_CLKERNELBUILDINGAPI_H + +#include "arm_compute/core/CL/CLCompileContext.h" +#include "arm_compute/core/Window.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +using ArgumentID = int32_t; + +static constexpr ArgumentID g_arg_placeholder = -1; + +/** Verbose and explicit way to enumerate all the tensor arguments variants used by + * all kernel implementations. This avoids any ambiguity in what kernel arguments are passed + */ +enum class TensorArgType : int +{ + Scalar, + + Vector, + + Image, + Image_Reinterpret_As_3D, + Image_Export_To_ClImage2D, + + Image_3D, // 3D Tensor represented as a 2D Image + stride_z + Image_3D_Export_To_ClImage2D, + + Tensor_3D, + Tensor_4D +}; +/** Describes all the info required to add a kernel argument at run time */ +struct ClKernelArgRuntimeDescriptor +{ + ClKernelArgRuntimeDescriptor(int arg_id, TensorArgType type, bool slide_along_dimz = true) + : arg_id{ arg_id }, tensor_arg_type{ type }, slide_along_dimz{ slide_along_dimz } + { + } + ~ClKernelArgRuntimeDescriptor() = default; + int arg_id{ g_arg_placeholder }; // Arg ID in the blueprint + TensorArgType tensor_arg_type{ TensorArgType::Image }; + bool slide_along_dimz{ true }; +}; + +using ClKernelArgList = std::vector; + +/** Intermediate representation of the final, complete kernel source. */ +class ClKernelBlueprint +{ +public: + ClKernelBlueprint(); + ~ClKernelBlueprint(); + +private: + struct Implementation; + std::unique_ptr _impl; + +public: + Implementation &impl(); + const Implementation &impl() const; +}; + +///// Kernel Components ///// + +/** Meta information about all Cl Kernel Components */ +struct ClKernelComponentDescriptor +{ + int32_t version{ 1 }; /**< Operator version */ +}; + +/** Component: Tensor Argument */ +struct ClTensorDescriptor +{ + ClTensorDescriptor(const ITensorInfo *info, unsigned int dim) + : tensor_info(info), slice_dim(dim) + { + } + + const ITensorInfo *tensor_info; + unsigned int slice_dim; +}; + +Status add_tensor_argument(ClKernelBlueprint &, const ClTensorDescriptor &, ArgumentID &); +Status add_tensor_intermed(ClKernelBlueprint &, ArgumentID &); + +/** Component: Gemm Native */ +struct GemmNativeDescriptor +{ + float alpha{}; + float beta{}; + unsigned int m{}; + unsigned int n{}; + unsigned int k{}; + unsigned int depth_output_gemm3d{}; + bool reinterpret_input_as_3d{}; + bool broadcast_bias{}; + bool fp_mixed_precision{}; + bool has_pad_y{}; + int nmult_transpose1xW_width{}; + int mult_interleave4x4_height{}; + GEMMLHSMatrixInfo lhs_info{}; + GEMMRHSMatrixInfo rhs_info{}; + int32_t a_offset{}; + int32_t b_offset{}; +}; + +Status add_kcomp_gemm_native(ClKernelBlueprint &, const ClKernelComponentDescriptor &, const GemmNativeDescriptor &, ArgumentID input_id, + ArgumentID weights_id, ArgumentID bias_id, ArgumentID &dst_id); + +/** Component: Eltwise Add */ +struct EltwiseAddDescriptor +{ + ConvertPolicy convert_policy{ ConvertPolicy::SATURATE }; +}; +Status add_kcomp_eltwise_add(ClKernelBlueprint &, const ClKernelComponentDescriptor &, const EltwiseAddDescriptor &, ArgumentID src0_id, + ArgumentID src1_id, ArgumentID &dst_id); + +/** Component: Activation */ +struct ActivationDescriptor +{ +}; +Status add_kcomp_activation(ClKernelBlueprint &, const ClKernelComponentDescriptor &, const ActivationDescriptor &, ArgumentID src_id, ArgumentID &dst_id); + +enum class ClippingStrategy +{ + TOP_LEFT, + TOP_RIGHT, + BOTTOM_LEFT, + BOTTOM_RIGHT, +}; + +/** Component: Store */ +struct TileDescriptor +{ + Size2D tile_dims{}; + Size2D boundaries{}; + ClippingStrategy clipping{ ClippingStrategy::TOP_LEFT }; + + TileDescriptor() + { + } + + TileDescriptor(Size2D dims, const Size2D &bound, const ClippingStrategy &clip) + : tile_dims(dims), boundaries(bound), clipping(clip) + { + } + + bool empty() const + { + return (tile_dims.area() == 0) || (boundaries.area() == 0); + } +}; + +enum class StoreType +{ + VStore, + VStorePartial, + StoreRow, + ConvertStoreRow, + StoreBlock, + ConvertStoreBlock, + StoreRowPartial, + StoreBlockPartial, + StoreBlockBoundaryAware, + StoreVectorSelect, + TStoreIndirectWidthSelect +}; + +Status add_kcomp_store(ClKernelBlueprint &, const ClKernelComponentDescriptor &, ArgumentID src_id, ArgumentID dst_id, const StoreType &store_type); + +///// Kernel Components ///// + +///// Building ///// + +/** Information required for kernel compilation. The build results of KernelBlueprint */ +struct ClKernelCode +{ + std::string name{}; /**< Kernel name */ + std::string code{}; /**< Kernel source code */ + std::string config_id{}; /**< Generated from blueprint based on complex component */ + CLBuildOptions build_options{}; /**< Kernel build options */ + Window window{}; /**< Execution window */ + ClKernelArgList arguments{}; /**< Kernel argument specficiations */ + + bool operator==(const ClKernelCode &other) const + { + return name == other.name && code == other.code && build_options == other.build_options; + } +}; + +/** GPU information for building the @ref ClKernelCode */ +struct GpuInfo +{ + GPUTarget target{ GPUTarget::UNKNOWN }; +}; + +/** All information required for building the @ref ClKernelCode */ +struct ClCodeBuilderContext +{ + GpuInfo gpu_info{}; +}; + +Status set_tile_info(ClKernelBlueprint &, const TileDescriptor &); + +/** Build final kernel source from KernelBlueprint */ +Status build(ClKernelCode &code, const ClCodeBuilderContext &, ClKernelBlueprint &); + +///// Building ///// + +///// Tuning ///// +struct ClExecutionDescriptor +{ + cl::NDRange suggested_lws{}; /**< Suggested local work-group size for optimal performance if not zero */ + cl::NDRange gws{}; /**< Global work-group to be used */ +}; + +Status tune_static(ClExecutionDescriptor &, const ClKernelCode &); + +///// Tuning ///// + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute +#endif //ARM_COMPUTE_EXPERIMENTAL_CLKERNELBUILDINGAPI_H + +#endif // defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION) \ No newline at end of file -- cgit v1.2.1