From 7d473dd2f84ca9a1e7a29d2bab1cf0c556970c4d Mon Sep 17 00:00:00 2001 From: steli01 Date: Wed, 6 Dec 2017 18:53:32 +0800 Subject: APPBROWSER-323: Transpose performance optimization Change-Id: Ib678dee9de43690e4cfb7be1e7ccf7a7ab38233d Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/112085 Reviewed-by: Joel Liang Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com Reviewed-by: Anthony Barbier --- src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'src/core/GLES_COMPUTE/kernels') diff --git a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp index acb998840b..621c9693fe 100644 --- a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp @@ -75,8 +75,16 @@ void GCTransposeKernel::configure(const IGCTensor *input, IGCTensor *output) build_opts.emplace(("#define TRANSPOSE_4X4")); num_elems_processed_per_iteration = 4; #elif defined(TRANSPOSE_8X8) /* TRANSPOSE_4X4 */ - build_opts.emplace(("#define TRANSPOSE_8X8")); - num_elems_processed_per_iteration = 8; + if(w_out != h_out) + { + build_opts.emplace("#define TRANSPOSE_8X8"); + num_elems_processed_per_iteration = 8; + } + else + { + build_opts.emplace("#define TRANSPOSE_8X8_SQUARE"); + num_elems_processed_per_iteration = 8; + } #endif /* TRANSPOSE_4X4 */ } -- cgit v1.2.1