diff options
author | steli01 <stephen.li@arm.com> | 2017-12-06 18:53:32 +0800 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:42:17 +0000 |
commit | 7d473dd2f84ca9a1e7a29d2bab1cf0c556970c4d (patch) | |
tree | 8aaf86ed69037992c0fef62139f7a0b8f1f1a3bd /src/core/GLES_COMPUTE/kernels | |
parent | b42d53c4c08d3aafaeb5b0b98f19e8a708710acf (diff) | |
download | ComputeLibrary-7d473dd2f84ca9a1e7a29d2bab1cf0c556970c4d.tar.gz |
APPBROWSER-323: Transpose performance optimization
Change-Id: Ib678dee9de43690e4cfb7be1e7ccf7a7ab38233d
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/112085
Reviewed-by: Joel Liang <joel.liang@arm.com>
Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/GLES_COMPUTE/kernels')
-rw-r--r-- | src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp | 12 |
1 files changed, 10 insertions, 2 deletions
diff --git a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp index acb998840b..621c9693fe 100644 --- a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp @@ -75,8 +75,16 @@ void GCTransposeKernel::configure(const IGCTensor *input, IGCTensor *output) build_opts.emplace(("#define TRANSPOSE_4X4")); num_elems_processed_per_iteration = 4; #elif defined(TRANSPOSE_8X8) /* TRANSPOSE_4X4 */ - build_opts.emplace(("#define TRANSPOSE_8X8")); - num_elems_processed_per_iteration = 8; + if(w_out != h_out) + { + build_opts.emplace("#define TRANSPOSE_8X8"); + num_elems_processed_per_iteration = 8; + } + else + { + build_opts.emplace("#define TRANSPOSE_8X8_SQUARE"); + num_elems_processed_per_iteration = 8; + } #endif /* TRANSPOSE_4X4 */ } |