aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLIm2ColKernel.cpp
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2018-12-12 10:18:04 +0000
committerGian Marco Iodice <gianmarco.iodice@arm.com>2018-12-14 14:57:48 +0000
commitbf9731edfa0439cad4d70efc3065e71e199c62b8 (patch)
tree71340a3d04a6294744c642ed6e4a56c0e8a77592 /src/core/CL/kernels/CLIm2ColKernel.cpp
parent92e278d5f462c930af1947883a5f48c10586ae9c (diff)
downloadComputeLibrary-bf9731edfa0439cad4d70efc3065e71e199c62b8.tar.gz
COMPMID-1687: Optimize CLGEMMMatrixMultiplyKernel for Mali-G76 - Part1
The current implementation is limited just to FP32 Change-Id: I185ab57e483e879d7c301e9cc3033efc8b41e244 Reviewed-on: https://review.mlplatform.org/389 Reviewed-by: Anthony Barbier <Anthony.barbier@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLIm2ColKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLIm2ColKernel.cpp6
1 files changed, 5 insertions, 1 deletions
diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp
index 54ef23f2a2..e3d8df53e5 100644
--- a/src/core/CL/kernels/CLIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLIm2ColKernel.cpp
@@ -192,11 +192,15 @@ Im2ColConfiguration configure_opencl_kernel(const ITensorInfo *input, const Size
num_elems_processed_per_iteration = 2;
is_padding_required_nchw = false;
- // Only the 3x3 case is optimized for NHWC
+ // Only the 3x3 and 9x9 cases are optimized for NHWC
if(kernel_dims == Size2D(3U, 3U))
{
kernel_name = "im2col3x3_";
}
+ else if(kernel_dims == Size2D(9U, 9U))
+ {
+ kernel_name = "im2col9x9_";
+ }
build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
build_opts.add_option("-DLAST_ACCESSED=" + support::cpp11::to_string(std::max(static_cast<int>(input_channel - num_elems_processed_per_iteration), 0)));