diff options
Diffstat (limited to 'src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h')
-rw-r--r-- | src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h | 25 |
1 files changed, 18 insertions, 7 deletions
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h index 27ab294cc9..23629f47bc 100644 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h @@ -56,7 +56,10 @@ enum class TensorArgType : int Image_3D_Export_To_ClImage2D, Tensor_3D, - Tensor_4D + Tensor_4D, + + Tensor_4D_t_Buffer, + Tensor_4D_t_Image }; /** Describes all the info required to add a kernel argument at run time */ struct ClKernelArgRuntimeDescriptor @@ -100,13 +103,12 @@ struct ClKernelComponentDescriptor /** Component: Tensor Argument */ struct ClTensorDescriptor { - ClTensorDescriptor(ITensorInfo *info, unsigned int dim) - : tensor_info(info), slice_dim(dim) + ClTensorDescriptor(ITensorInfo *info) + : tensor_info(info) { } ITensorInfo *tensor_info; - unsigned int slice_dim; }; Status add_tensor_argument(ClKernelBlueprint &, const ClTensorDescriptor &, ArgumentID &); @@ -134,7 +136,7 @@ struct GemmNativeDescriptor }; Status add_kcomp_gemm_native(ClKernelBlueprint &, const ClKernelComponentDescriptor &, const GemmNativeDescriptor &, - ArgumentID input_id, ArgumentID weights_id, ArgumentID bias_id, ArgumentID &dst_id); + ArgumentID lhs_id, ArgumentID rhs_id, ArgumentID bias_id, ArgumentID &dst_id); /** Component: Eltwise Add */ struct EltwiseAddDescriptor @@ -150,6 +152,14 @@ struct ActivationDescriptor }; Status add_kcomp_activation(ClKernelBlueprint &, const ClKernelComponentDescriptor &, const ActivationDescriptor &, ArgumentID src_id, ArgumentID &dst_id); +/** Component: Direct Convolution **/ +struct DirectConvolutionDescriptor +{ + PadStrideInfo pad_stride_info{}; +}; +Status add_kcomp_direct_conv(ClKernelBlueprint &, const ClKernelComponentDescriptor &, const DirectConvolutionDescriptor &, + ArgumentID src_id, ArgumentID weight_id, ArgumentID bias_id, ArgumentID &dst_id); + enum class ClippingStrategy { TOP_LEFT, @@ -239,8 +249,9 @@ Status build(ClKernelCode &code, const ClCodeBuilderContext &, ClKernelBlueprint ///// Tuning ///// struct ClExecutionDescriptor { - cl::NDRange suggested_lws{}; /**< Suggested local work-group size for optimal performance if not zero */ - cl::NDRange gws{}; /**< Global work-group to be used */ + cl::NDRange suggested_lws{}; /**< Suggested local work-group size for optimal performance if not zero */ + cl::NDRange gws{}; /**< Global work-group to be used */ + bool skip_sliding_window{ false }; /**< Skip sliding window slices during execution loop */ }; Status tune_static(ClExecutionDescriptor &, const ClKernelCode &); |