aboutsummaryrefslogtreecommitdiff
path: root/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
diff options
context:
space:
mode:
authorsteli01 <stephen.li@arm.com>2017-12-06 18:53:32 +0800
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:42:17 +0000
commit7d473dd2f84ca9a1e7a29d2bab1cf0c556970c4d (patch)
tree8aaf86ed69037992c0fef62139f7a0b8f1f1a3bd /src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
parentb42d53c4c08d3aafaeb5b0b98f19e8a708710acf (diff)
downloadComputeLibrary-7d473dd2f84ca9a1e7a29d2bab1cf0c556970c4d.tar.gz
APPBROWSER-323: Transpose performance optimization
Change-Id: Ib678dee9de43690e4cfb7be1e7ccf7a7ab38233d Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/112085 Reviewed-by: Joel Liang <joel.liang@arm.com> Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp')
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp12
1 files changed, 10 insertions, 2 deletions
diff --git a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
index acb998840b..621c9693fe 100644
--- a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
@@ -75,8 +75,16 @@ void GCTransposeKernel::configure(const IGCTensor *input, IGCTensor *output)
build_opts.emplace(("#define TRANSPOSE_4X4"));
num_elems_processed_per_iteration = 4;
#elif defined(TRANSPOSE_8X8) /* TRANSPOSE_4X4 */
- build_opts.emplace(("#define TRANSPOSE_8X8"));
- num_elems_processed_per_iteration = 8;
+ if(w_out != h_out)
+ {
+ build_opts.emplace("#define TRANSPOSE_8X8");
+ num_elems_processed_per_iteration = 8;
+ }
+ else
+ {
+ build_opts.emplace("#define TRANSPOSE_8X8_SQUARE");
+ num_elems_processed_per_iteration = 8;
+ }
#endif /* TRANSPOSE_4X4 */
}