From 2b6ebfe4270b06b09e45f306e8384950aeca7e4e Mon Sep 17 00:00:00 2001 From: Ramy Elgammal Date: Thu, 9 Mar 2023 21:15:37 +0000 Subject: Implement OpenCL MatMul for Lhs NT Rhs T/NT FP32/16 - Implement ClNativeMatMulKernel class - Implement opencl kernel for LHS non-transposed and RHS non-transposed - Implement opencl kernel for LHS non-transposed and RHS transposed - Add test fixture and dataset for matmul - Implement transpose_tensor() for reference implementation to transpose high dimensional tensors Resolves: COMPMID-5944, COMPMID-5951 Co-authored-by: Gunes Bayir Co-authored-by: Ramy Elgammal Change-Id: I1d5b8978f41be27baddb3153ade880472141573f Signed-off-by: Gunes Bayir Signed-off-by: Ramy Elgammal Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9333 Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice Benchmark: Arm Jenkins --- SConscript | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'SConscript') diff --git a/SConscript b/SConscript index a480c45d62..205764b9a7 100644 --- a/SConscript +++ b/SConscript @@ -359,6 +359,7 @@ if env['opencl'] and env['embed_kernels']: 'src/core/CL/cl_kernels/common/cast.cl', 'src/core/CL/cl_kernels/common/comparisons.cl', 'src/core/CL/cl_kernels/common/concatenate.cl', + 'src/core/CL/cl_kernels/common/convolution_layer.cl', 'src/core/CL/cl_kernels/common/col2im.cl', 'src/core/CL/cl_kernels/common/convert_fc_weights.cl', 'src/core/CL/cl_kernels/common/copy_tensor.cl', @@ -368,6 +369,9 @@ if env['opencl'] and env['embed_kernels']: 'src/core/CL/cl_kernels/common/elementwise_operation.cl', 'src/core/CL/cl_kernels/common/elementwise_operation_quantized.cl', 'src/core/CL/cl_kernels/common/elementwise_unary.cl', + 'src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl', + 'src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped.cl', + 'src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped_only_rhs.cl', 'src/core/CL/cl_kernels/common/fft_digit_reverse.cl', 'src/core/CL/cl_kernels/common/fft.cl', 'src/core/CL/cl_kernels/common/fft_scale.cl', @@ -377,21 +381,18 @@ if env['opencl'] and env['embed_kernels']: 'src/core/CL/cl_kernels/common/gemm.cl', 'src/core/CL/cl_kernels/common/gemm_reshaped_only_rhs_mmul.cl', 'src/core/CL/cl_kernels/common/gemm_utils.cl', - 'src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl', - 'src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped.cl', - 'src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped_only_rhs.cl', - 'src/core/CL/cl_kernels/common/gemv.cl', 'src/core/CL/cl_kernels/common/gemmlowp.cl', 'src/core/CL/cl_kernels/common/gemmlowp_reshaped_only_rhs_mmul.cl', + 'src/core/CL/cl_kernels/common/gemv.cl', 'src/core/CL/cl_kernels/common/generate_proposals.cl', 'src/core/CL/cl_kernels/common/generate_proposals_quantized.cl', 'src/core/CL/cl_kernels/common/instance_normalization.cl', 'src/core/CL/cl_kernels/common/l2_normalize.cl', + 'src/core/CL/cl_kernels/common/mat_mul.cl', 'src/core/CL/cl_kernels/common/mean_stddev_normalization.cl', - 'src/core/CL/cl_kernels/common/unpooling_layer.cl', 'src/core/CL/cl_kernels/common/memset.cl', - 'src/core/CL/cl_kernels/common/nonmax.cl', 'src/core/CL/cl_kernels/common/minmax_layer.cl', + 'src/core/CL/cl_kernels/common/nonmax.cl', 'src/core/CL/cl_kernels/common/pad_layer.cl', 'src/core/CL/cl_kernels/common/permute.cl', 'src/core/CL/cl_kernels/common/pixelwise_mul_float.cl', @@ -401,18 +402,18 @@ if env['opencl'] and env['embed_kernels']: 'src/core/CL/cl_kernels/common/range.cl', 'src/core/CL/cl_kernels/common/reduction_operation.cl', 'src/core/CL/cl_kernels/common/reshape_layer.cl', - 'src/core/CL/cl_kernels/common/convolution_layer.cl', 'src/core/CL/cl_kernels/common/reverse.cl', 'src/core/CL/cl_kernels/common/roi_align_layer.cl', 'src/core/CL/cl_kernels/common/roi_align_layer_quantized.cl', 'src/core/CL/cl_kernels/common/roi_pooling_layer.cl', 'src/core/CL/cl_kernels/common/select.cl', + 'src/core/CL/cl_kernels/common/slice_ops.cl', 'src/core/CL/cl_kernels/common/softmax_layer.cl', 'src/core/CL/cl_kernels/common/softmax_layer_quantized.cl', 'src/core/CL/cl_kernels/common/stack_layer.cl', - 'src/core/CL/cl_kernels/common/slice_ops.cl', 'src/core/CL/cl_kernels/common/tile.cl', - 'src/core/CL/cl_kernels/common/transpose.cl' + 'src/core/CL/cl_kernels/common/transpose.cl', + 'src/core/CL/cl_kernels/common/unpooling_layer.cl' ] # NCHW kernels -- cgit v1.2.1