diff options
author | Gian Marco <gianmarco.iodice@arm.com> | 2018-01-30 13:35:54 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:47:18 +0000 |
commit | 19835e591cb0b66a0f5000ae1505bf299e50337d (patch) | |
tree | 525ee8b233a2cefe3b2734d76fdb91093b8c2d50 /src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp | |
parent | 6fa009e05ae32e64f397f54087885c3eb68f0b4b (diff) | |
download | ComputeLibrary-19835e591cb0b66a0f5000ae1505bf299e50337d.tar.gz |
COMPMID-882 - Optimizing GEMMLowp on OpenCL reshaping matrices
This new optimization allows to achieve 36.3 % of MAC utilisation on Mate 9 @ 1GHz.
The performance have been reported here
https://confluence.arm.com/display/MLENG/GEMMLowp+performance%3A+ACL+18.02
Change-Id: I71b6a217068763dfdc11bbf3574ee0eb94f93679
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/118531
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp')
-rw-r--r-- | src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp index 63aed6df32..24d218760e 100644 --- a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp +++ b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp @@ -113,6 +113,7 @@ void CLGEMMTranspose1xWKernel::configure(const ICLTensor *input, ICLTensor *outp // Create build options CLBuildOptions build_opts; + build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input->info()->element_size())); build_opts.add_option("-DTRANSPOSE_W=" + support::cpp11::to_string(num_elems_processed_per_iteration)); build_opts.add_option("-DMULT_TRANSPOSE1XW_WIDTH=" + support::cpp11::to_string(mult_transpose1xW_width)); |