aboutsummaryrefslogtreecommitdiff
path: root/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h')
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h201
1 files changed, 27 insertions, 174 deletions
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h
index 23629f47bc..3dccdd7351 100644
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h
@@ -21,13 +21,18 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#if defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
+#ifndef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
+#error "This experimental feature must be enabled with -DENABLE_EXPERIMENTAL_DYNAMIC_FUSION"
+#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
#ifndef ARM_COMPUTE_EXPERIMENTAL_CLKERNELBUILDINGAPI_H
#define ARM_COMPUTE_EXPERIMENTAL_CLKERNELBUILDINGAPI_H
#include "arm_compute/core/CL/CLCompileContext.h"
#include "arm_compute/core/Window.h"
+#include "arm_compute/core/experimental/ClWorkload.h"
+#include "arm_compute/core/experimental/DependencyGraph.h"
+#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h"
namespace arm_compute
{
@@ -35,46 +40,9 @@ namespace experimental
{
namespace dynamic_fusion
{
-using ArgumentID = int32_t;
+using ArgumentID = DependencyGraph::Id;
-static constexpr ArgumentID g_arg_placeholder = -1;
-
-/** Verbose and explicit way to enumerate all the tensor arguments variants used by
- * all kernel implementations. This avoids any ambiguity in what kernel arguments are passed
- */
-enum class TensorArgType : int
-{
- Scalar,
-
- Vector,
-
- Image,
- Image_Reinterpret_As_3D,
- Image_Export_To_ClImage2D,
-
- Image_3D, // 3D Tensor represented as a 2D Image + stride_z
- Image_3D_Export_To_ClImage2D,
-
- Tensor_3D,
- Tensor_4D,
-
- Tensor_4D_t_Buffer,
- Tensor_4D_t_Image
-};
-/** Describes all the info required to add a kernel argument at run time */
-struct ClKernelArgRuntimeDescriptor
-{
- ClKernelArgRuntimeDescriptor(int arg_id, TensorArgType type, bool slide_along_dimz = true)
- : arg_id{ arg_id }, tensor_arg_type{ type }, slide_along_dimz{ slide_along_dimz }
- {
- }
- ~ClKernelArgRuntimeDescriptor() = default;
- int arg_id{ g_arg_placeholder }; // Arg ID in the blueprint
- TensorArgType tensor_arg_type{ TensorArgType::Image };
- bool slide_along_dimz{ true };
-};
-
-using ClKernelArgList = std::vector<ClKernelArgRuntimeDescriptor>;
+static constexpr ArgumentID g_arg_placeholder = DependencyGraph::empty_id();
/** Intermediate representation of the final, complete kernel source. */
class ClKernelBlueprint
@@ -93,145 +61,38 @@ public:
};
///// Kernel Components /////
-
-/** Meta information about all Cl Kernel Components */
-struct ClKernelComponentDescriptor
-{
- int32_t version{ 1 }; /**< Operator version */
-};
-
-/** Component: Tensor Argument */
-struct ClTensorDescriptor
-{
- ClTensorDescriptor(ITensorInfo *info)
- : tensor_info(info)
- {
- }
-
- ITensorInfo *tensor_info;
-};
-
-Status add_tensor_argument(ClKernelBlueprint &, const ClTensorDescriptor &, ArgumentID &);
-Status add_tensor_intermed(ClKernelBlueprint &, ArgumentID &);
-
-/** Component: Gemm Native */
-struct GemmNativeDescriptor
-{
- float alpha{};
- float beta{};
- unsigned int m{};
- unsigned int n{};
- unsigned int k{};
- unsigned int depth_output_gemm3d{};
- bool reinterpret_input_as_3d{};
- bool broadcast_bias{};
- bool fp_mixed_precision{};
- bool has_pad_y{};
- int nmult_transpose1xW_width{};
- int mult_interleave4x4_height{};
- GEMMLHSMatrixInfo lhs_info{};
- GEMMRHSMatrixInfo rhs_info{};
- int32_t a_offset{};
- int32_t b_offset{};
-};
-
-Status add_kcomp_gemm_native(ClKernelBlueprint &, const ClKernelComponentDescriptor &, const GemmNativeDescriptor &,
- ArgumentID lhs_id, ArgumentID rhs_id, ArgumentID bias_id, ArgumentID &dst_id);
-
/** Component: Eltwise Add */
-struct EltwiseAddDescriptor
-{
- ConvertPolicy convert_policy{ ConvertPolicy::SATURATE };
-};
-Status add_kcomp_eltwise_add(ClKernelBlueprint &, const ClKernelComponentDescriptor &, const EltwiseAddDescriptor &, ArgumentID src0_id,
+Status add_kcomp_eltwise_add(ClKernelBlueprint &, const ClEltwiseAddKernelDescriptor &, ArgumentID src0_id,
ArgumentID src1_id, ArgumentID &dst_id);
/** Component: Activation */
-struct ActivationDescriptor
-{
-};
-Status add_kcomp_activation(ClKernelBlueprint &, const ClKernelComponentDescriptor &, const ActivationDescriptor &, ArgumentID src_id, ArgumentID &dst_id);
+Status add_kcomp_activation(ClKernelBlueprint &, const ClActivationKernelDescriptor &, ArgumentID src_id, ArgumentID &dst_id);
/** Component: Direct Convolution **/
-struct DirectConvolutionDescriptor
-{
- PadStrideInfo pad_stride_info{};
-};
-Status add_kcomp_direct_conv(ClKernelBlueprint &, const ClKernelComponentDescriptor &, const DirectConvolutionDescriptor &,
- ArgumentID src_id, ArgumentID weight_id, ArgumentID bias_id, ArgumentID &dst_id);
-
-enum class ClippingStrategy
-{
- TOP_LEFT,
- TOP_RIGHT,
- BOTTOM_LEFT,
- BOTTOM_RIGHT,
-};
+Status add_kcomp_direct_conv2d(ClKernelBlueprint &, const ClDirectConv2dKernelDescriptor &,
+ ArgumentID src_id, ArgumentID weight_id, ArgumentID bias_id, ArgumentID &dst_id);
-/** Component: Store */
-struct TileDescriptor
-{
- Size2D tile_dims{};
- Size2D boundaries{};
- ClippingStrategy clipping{ ClippingStrategy::TOP_LEFT };
-
- TileDescriptor()
- {
- }
+Status add_kcomp_store(ClKernelBlueprint &, const StoreType &store_type, ArgumentID src_id, ArgumentID dst_id);
- TileDescriptor(Size2D dims, const Size2D &bound, const ClippingStrategy &clip)
- : tile_dims(dims), boundaries(bound), clipping(clip)
- {
- }
-
- bool empty() const
- {
- return (tile_dims.area() == 0) || (boundaries.area() == 0);
- }
-};
-
-enum class StoreType
-{
- VStore,
- VStorePartial,
- StoreRow,
- ConvertStoreRow,
- StoreBlock,
- ConvertStoreBlock,
- StoreRowPartial,
- StoreBlockPartial,
- StoreBlockBoundaryAware,
- StoreVectorSelect,
- TStoreIndirectWidthSelect
-};
-
-Status add_kcomp_store(ClKernelBlueprint &, const ClKernelComponentDescriptor &, ArgumentID src_id, ArgumentID dst_id, const StoreType &store_type);
+Status add_tensor(ClKernelBlueprint &, ITensorInfo *, ArgumentID &, ArgumentID merge_point = DependencyGraph::empty_id());
///// Kernel Components /////
///// Building /////
-/** Information required for kernel compilation. The build results of KernelBlueprint */
-struct ClKernelCode
-{
- std::string name{}; /**< Kernel name */
- std::string code{}; /**< Kernel source code */
- std::string config_id{}; /**< Generated from blueprint based on complex component */
- CLBuildOptions build_options{}; /**< Kernel build options */
- Window window{}; /**< Execution window */
- ClKernelArgList arguments{}; /**< Kernel argument specficiations */
-
- bool operator==(const ClKernelCode &other) const
- {
- return name == other.name && code == other.code && build_options == other.build_options;
- }
-};
+/** Update existing merge tensor @p merge_point to point to @p t_id
+ *
+ * @param t_id
+ * @param merge_point
+ * @return Status
+ */
+Status update_merge_point(ClKernelBlueprint &, ArgumentID t_id, ArgumentID merge_point);
-/** GPU information for building the @ref ClKernelCode */
-struct GpuInfo
-{
- GPUTarget target{ GPUTarget::UNKNOWN };
-};
+/** Get dependency graph
+ *
+ * @return DependencyGraph
+ */
+DependencyGraph get_dependency_graph(const ClKernelBlueprint &blueprint);
/** All information required for building the @ref ClKernelCode */
struct ClCodeBuilderContext
@@ -247,12 +108,6 @@ Status build(ClKernelCode &code, const ClCodeBuilderContext &, ClKernelBlueprint
///// Building /////
///// Tuning /////
-struct ClExecutionDescriptor
-{
- cl::NDRange suggested_lws{}; /**< Suggested local work-group size for optimal performance if not zero */
- cl::NDRange gws{}; /**< Global work-group to be used */
- bool skip_sliding_window{ false }; /**< Skip sliding window slices during execution loop */
-};
Status tune_static(ClExecutionDescriptor &, const ClKernelCode &);
@@ -261,6 +116,4 @@ Status tune_static(ClExecutionDescriptor &, const ClKernelCode &);
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif //ARM_COMPUTE_EXPERIMENTAL_CLKERNELBUILDINGAPI_H
-
-#endif // defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION) \ No newline at end of file
+#endif //ARM_COMPUTE_EXPERIMENTAL_CLKERNELBUILDINGAPI_H \ No newline at end of file