From 07c37f9954555ae3523c85f16e46cf94e9a9e290 Mon Sep 17 00:00:00 2001 From: Joel Liang Date: Fri, 17 Nov 2017 11:34:19 +0800 Subject: APPBROWSER-313: Performance improvement for softmax layer Process 8 elements at one time for better performance Change-Id: I90d31e5d0834c5096fdb82f174482ade762b63d2 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111840 Reviewed-by: Stephen Li Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com Reviewed-by: Anthony Barbier --- src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp') diff --git a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp index d7d47d2802..1db927c8ff 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp @@ -61,6 +61,8 @@ void GCSoftmaxLayer::configure(const IGCTensor *input, IGCTensor *output) void GCSoftmaxLayer::run() { GCScheduler::get().enqueue(_max_kernel, false); + GCScheduler::get().sync(); GCScheduler::get().enqueue(_shift_exp_sum_kernel, false); + GCScheduler::get().sync(); GCScheduler::get().enqueue(_norm_kernel); } -- cgit v1.2.1