From 2b6ebfe4270b06b09e45f306e8384950aeca7e4e Mon Sep 17 00:00:00 2001 From: Ramy Elgammal Date: Thu, 9 Mar 2023 21:15:37 +0000 Subject: Implement OpenCL MatMul for Lhs NT Rhs T/NT FP32/16 - Implement ClNativeMatMulKernel class - Implement opencl kernel for LHS non-transposed and RHS non-transposed - Implement opencl kernel for LHS non-transposed and RHS transposed - Add test fixture and dataset for matmul - Implement transpose_tensor() for reference implementation to transpose high dimensional tensors Resolves: COMPMID-5944, COMPMID-5951 Co-authored-by: Gunes Bayir Co-authored-by: Ramy Elgammal Change-Id: I1d5b8978f41be27baddb3153ade880472141573f Signed-off-by: Gunes Bayir Signed-off-by: Ramy Elgammal Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9333 Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice Benchmark: Arm Jenkins --- filelist.json | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'filelist.json') diff --git a/filelist.json b/filelist.json index 1b0d07bc42..f858c6a29f 100644 --- a/filelist.json +++ b/filelist.json @@ -509,6 +509,13 @@ ] } }, + "MatMul": { + "files": { + "common": [ + "src/gpu/cl/kernels/ClNativeMatMulKernel.cpp" + ] + } + }, "GenerateProposals": { "deps": [ "BoundingBoxTransform", "Dequantize", "Pad", "Permute", "Quantize", "Reshape" ], "files": { -- cgit v1.2.1