From bf9731edfa0439cad4d70efc3065e71e199c62b8 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Wed, 12 Dec 2018 10:18:04 +0000 Subject: COMPMID-1687: Optimize CLGEMMMatrixMultiplyKernel for Mali-G76 - Part1 The current implementation is limited just to FP32 Change-Id: I185ab57e483e879d7c301e9cc3033efc8b41e244 Reviewed-on: https://review.mlplatform.org/389 Reviewed-by: Anthony Barbier Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio --- src/core/CL/kernels/CLIm2ColKernel.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/core/CL/kernels/CLIm2ColKernel.cpp') diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 54ef23f2a2..e3d8df53e5 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -192,11 +192,15 @@ Im2ColConfiguration configure_opencl_kernel(const ITensorInfo *input, const Size num_elems_processed_per_iteration = 2; is_padding_required_nchw = false; - // Only the 3x3 case is optimized for NHWC + // Only the 3x3 and 9x9 cases are optimized for NHWC if(kernel_dims == Size2D(3U, 3U)) { kernel_name = "im2col3x3_"; } + else if(kernel_dims == Size2D(9U, 9U)) + { + kernel_name = "im2col9x9_"; + } build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); build_opts.add_option("-DLAST_ACCESSED=" + support::cpp11::to_string(std::max(static_cast(input_channel - num_elems_processed_per_iteration), 0))); -- cgit v1.2.1