From 2b6ebfe4270b06b09e45f306e8384950aeca7e4e Mon Sep 17 00:00:00 2001 From: Ramy Elgammal Date: Thu, 9 Mar 2023 21:15:37 +0000 Subject: Implement OpenCL MatMul for Lhs NT Rhs T/NT FP32/16 - Implement ClNativeMatMulKernel class - Implement opencl kernel for LHS non-transposed and RHS non-transposed - Implement opencl kernel for LHS non-transposed and RHS transposed - Add test fixture and dataset for matmul - Implement transpose_tensor() for reference implementation to transpose high dimensional tensors Resolves: COMPMID-5944, COMPMID-5951 Co-authored-by: Gunes Bayir Co-authored-by: Ramy Elgammal Change-Id: I1d5b8978f41be27baddb3153ade880472141573f Signed-off-by: Gunes Bayir Signed-off-by: Ramy Elgammal Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9333 Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice Benchmark: Arm Jenkins --- utils/TypePrinter.h | 116 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 76 insertions(+), 40 deletions(-) (limited to 'utils') diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h index db27ddccde..c3af0a2419 100644 --- a/utils/TypePrinter.h +++ b/utils/TypePrinter.h @@ -421,7 +421,8 @@ inline ::std::ostream &operator<<(::std::ostream &os, const GEMMLHSMatrixInfo &g */ inline ::std::ostream &operator<<(::std::ostream &os, const GEMMRHSMatrixInfo &gemm_info) { - os << "( n0=" << (unsigned int)gemm_info.n0 << " k0=" << gemm_info.k0 << " h0=" << gemm_info.h0 << " trans=" << gemm_info.transpose << " inter=" << gemm_info.interleave << " exp_img=" << gemm_info.export_to_cl_image << "})"; + os << "( n0=" << (unsigned int)gemm_info.n0 << " k0=" << gemm_info.k0 << " h0=" << gemm_info.h0 << " trans=" << gemm_info.transpose << " inter=" << gemm_info.interleave << " exp_img=" << + gemm_info.export_to_cl_image << "})"; return os; } @@ -474,7 +475,8 @@ inline std::string to_string(const GEMMKernelInfo &gemm_info) inline ::std::ostream &operator<<(::std::ostream &os, const BoundingBoxTransformInfo &bbox_info) { auto weights = bbox_info.weights(); - os << "(" << bbox_info.img_width() << "x" << bbox_info.img_height() << ")~" << bbox_info.scale() << "(weights={" << weights[0] << ", " << weights[1] << ", " << weights[2] << ", " << weights[3] << "})"; + os << "(" << bbox_info.img_width() << "x" << bbox_info.img_height() << ")~" << bbox_info.scale() << "(weights={" << weights[0] << ", " << weights[1] << ", " << weights[2] << ", " << weights[3] << + "})"; return os; } @@ -3333,46 +3335,46 @@ inline std::string to_string(const Conv3dInfo &conv3d_info) inline std::string to_string(const WeightFormat wf) { #define __CASE_WEIGHT_FORMAT(wf) \ - case WeightFormat::wf: \ - return #wf; +case WeightFormat::wf: \ + return #wf; switch(wf) { - __CASE_WEIGHT_FORMAT(UNSPECIFIED) - __CASE_WEIGHT_FORMAT(ANY) - __CASE_WEIGHT_FORMAT(OHWI) - __CASE_WEIGHT_FORMAT(OHWIo2) - __CASE_WEIGHT_FORMAT(OHWIo4) - __CASE_WEIGHT_FORMAT(OHWIo8) - __CASE_WEIGHT_FORMAT(OHWIo16) - __CASE_WEIGHT_FORMAT(OHWIo32) - __CASE_WEIGHT_FORMAT(OHWIo64) - __CASE_WEIGHT_FORMAT(OHWIo128) - __CASE_WEIGHT_FORMAT(OHWIo4i2) - __CASE_WEIGHT_FORMAT(OHWIo4i2_bf16) - __CASE_WEIGHT_FORMAT(OHWIo8i2) - __CASE_WEIGHT_FORMAT(OHWIo8i2_bf16) - __CASE_WEIGHT_FORMAT(OHWIo16i2) - __CASE_WEIGHT_FORMAT(OHWIo16i2_bf16) - __CASE_WEIGHT_FORMAT(OHWIo32i2) - __CASE_WEIGHT_FORMAT(OHWIo32i2_bf16) - __CASE_WEIGHT_FORMAT(OHWIo64i2) - __CASE_WEIGHT_FORMAT(OHWIo64i2_bf16) - __CASE_WEIGHT_FORMAT(OHWIo4i4) - __CASE_WEIGHT_FORMAT(OHWIo4i4_bf16) - __CASE_WEIGHT_FORMAT(OHWIo8i4) - __CASE_WEIGHT_FORMAT(OHWIo8i4_bf16) - __CASE_WEIGHT_FORMAT(OHWIo16i4) - __CASE_WEIGHT_FORMAT(OHWIo16i4_bf16) - __CASE_WEIGHT_FORMAT(OHWIo32i4) - __CASE_WEIGHT_FORMAT(OHWIo32i4_bf16) - __CASE_WEIGHT_FORMAT(OHWIo64i4) - __CASE_WEIGHT_FORMAT(OHWIo64i4_bf16) - __CASE_WEIGHT_FORMAT(OHWIo2i8) - __CASE_WEIGHT_FORMAT(OHWIo4i8) - __CASE_WEIGHT_FORMAT(OHWIo8i8) - __CASE_WEIGHT_FORMAT(OHWIo16i8) - __CASE_WEIGHT_FORMAT(OHWIo32i8) - __CASE_WEIGHT_FORMAT(OHWIo64i8) + __CASE_WEIGHT_FORMAT(UNSPECIFIED) + __CASE_WEIGHT_FORMAT(ANY) + __CASE_WEIGHT_FORMAT(OHWI) + __CASE_WEIGHT_FORMAT(OHWIo2) + __CASE_WEIGHT_FORMAT(OHWIo4) + __CASE_WEIGHT_FORMAT(OHWIo8) + __CASE_WEIGHT_FORMAT(OHWIo16) + __CASE_WEIGHT_FORMAT(OHWIo32) + __CASE_WEIGHT_FORMAT(OHWIo64) + __CASE_WEIGHT_FORMAT(OHWIo128) + __CASE_WEIGHT_FORMAT(OHWIo4i2) + __CASE_WEIGHT_FORMAT(OHWIo4i2_bf16) + __CASE_WEIGHT_FORMAT(OHWIo8i2) + __CASE_WEIGHT_FORMAT(OHWIo8i2_bf16) + __CASE_WEIGHT_FORMAT(OHWIo16i2) + __CASE_WEIGHT_FORMAT(OHWIo16i2_bf16) + __CASE_WEIGHT_FORMAT(OHWIo32i2) + __CASE_WEIGHT_FORMAT(OHWIo32i2_bf16) + __CASE_WEIGHT_FORMAT(OHWIo64i2) + __CASE_WEIGHT_FORMAT(OHWIo64i2_bf16) + __CASE_WEIGHT_FORMAT(OHWIo4i4) + __CASE_WEIGHT_FORMAT(OHWIo4i4_bf16) + __CASE_WEIGHT_FORMAT(OHWIo8i4) + __CASE_WEIGHT_FORMAT(OHWIo8i4_bf16) + __CASE_WEIGHT_FORMAT(OHWIo16i4) + __CASE_WEIGHT_FORMAT(OHWIo16i4_bf16) + __CASE_WEIGHT_FORMAT(OHWIo32i4) + __CASE_WEIGHT_FORMAT(OHWIo32i4_bf16) + __CASE_WEIGHT_FORMAT(OHWIo64i4) + __CASE_WEIGHT_FORMAT(OHWIo64i4_bf16) + __CASE_WEIGHT_FORMAT(OHWIo2i8) + __CASE_WEIGHT_FORMAT(OHWIo4i8) + __CASE_WEIGHT_FORMAT(OHWIo8i8) + __CASE_WEIGHT_FORMAT(OHWIo16i8) + __CASE_WEIGHT_FORMAT(OHWIo32i8) + __CASE_WEIGHT_FORMAT(OHWIo64i8) default: return "invalid value"; } @@ -3677,6 +3679,40 @@ inline std::string to_string(const experimental::dynamic_fusion::SoftmaxAttribut return str.str(); } +/** Formatted output of the arm_compute::MatMulKernelInfo type. + * + * @param[out] os Output stream. + * @param[in] matmul_info arm_compute::MatMulKernelInfo type to output. + * + * @return Modified output stream. + */ +inline ::std::ostream &operator<<(::std::ostream &os, const arm_compute::MatMulKernelInfo &matmul_info) +{ + os << "MatMulKernelInfo=" + << "[" + << "adj_lhs=" << matmul_info.adj_lhs << ", " + << "adj_rhs=" << matmul_info.adj_rhs << ", " + << "M0=" << matmul_info.m0 << ", " + << "N0=" << matmul_info.n0 << ", " + << "K0=" << matmul_info.k0 << ", " + << "export_rhs_to_cl_image=" << matmul_info.export_rhs_to_cl_image + << "]"; + + return os; +} +/** Formatted output of the arm_compute::MatMulKernelInfo type. + * + * @param[in] matmul_info arm_compute::MatMulKernelInfo type to output. + * + * @return Formatted string. + */ +inline std::string to_string(const arm_compute::MatMulKernelInfo &matmul_info) +{ + std::stringstream str; + str << matmul_info; + return str.str(); +} + } // namespace arm_compute #endif /* __ARM_COMPUTE_TYPE_PRINTER_H__ */ -- cgit v1.2.1