From eb8f71eecbc44e64cd7814f53b27b42c43dd660b Mon Sep 17 00:00:00 2001 From: Joel Liang Date: Wed, 27 Dec 2017 13:16:00 +0800 Subject: APPBROWSER-230, APPBROWSER-300: Rename GCScheduler enqueue to dispatch, sync to memory_barrier Also fix the synchronisation issues between different kernels. Change-Id: Ib59d83ae8d5cc8b0bdf13e6f4958edccdab91ca4 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/114594 Reviewed-by: Anthony Barbier Tested-by: Jenkins --- src/runtime/GLES_COMPUTE/GCScheduler.cpp | 4 ++-- src/runtime/GLES_COMPUTE/IGCSimpleFunction.cpp | 7 +++---- .../GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp | 2 +- .../GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp | 5 +++-- src/runtime/GLES_COMPUTE/functions/GCDropoutLayer.cpp | 2 +- .../GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp | 13 ++++++++----- src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp | 10 ++++++---- src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp | 8 +++++--- .../GLES_COMPUTE/functions/GCNormalizePlanarYUVLayer.cpp | 2 +- src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp | 10 +++++----- 10 files changed, 35 insertions(+), 28 deletions(-) (limited to 'src/runtime/GLES_COMPUTE') diff --git a/src/runtime/GLES_COMPUTE/GCScheduler.cpp b/src/runtime/GLES_COMPUTE/GCScheduler.cpp index f19b43348d..fcc855957f 100644 --- a/src/runtime/GLES_COMPUTE/GCScheduler.cpp +++ b/src/runtime/GLES_COMPUTE/GCScheduler.cpp @@ -63,7 +63,7 @@ GCScheduler &GCScheduler::get() return scheduler; } -void GCScheduler::enqueue(IGCKernel &kernel, bool flush) +void GCScheduler::dispatch(IGCKernel &kernel, bool flush) { kernel.run(kernel.window()); if(flush) @@ -72,7 +72,7 @@ void GCScheduler::enqueue(IGCKernel &kernel, bool flush) } } -void GCScheduler::sync() +void GCScheduler::memory_barrier() { ARM_COMPUTE_GL_CHECK(glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT)); } diff --git a/src/runtime/GLES_COMPUTE/IGCSimpleFunction.cpp b/src/runtime/GLES_COMPUTE/IGCSimpleFunction.cpp index 19f178f445..f2926b0a3f 100644 --- a/src/runtime/GLES_COMPUTE/IGCSimpleFunction.cpp +++ b/src/runtime/GLES_COMPUTE/IGCSimpleFunction.cpp @@ -38,8 +38,7 @@ void IGCSimpleFunction::run() { ARM_COMPUTE_ERROR_ON_MSG(!_kernel, "The child class didn't set the GLES kernel or function isn't configured"); - // FIXME(APPBROWSER-300): We may need to rename "enqueue" to "dispatch" and "sync" to "memory_barrier". - GCScheduler::get().enqueue(_border_handler, false); - GCScheduler::get().sync(); - GCScheduler::get().enqueue(*_kernel); + GCScheduler::get().dispatch(_border_handler, false); + GCScheduler::get().memory_barrier(); + GCScheduler::get().dispatch(*_kernel); } diff --git a/src/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp index 2e546a663a..99bdf43c05 100755 --- a/src/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp @@ -44,5 +44,5 @@ void GCBatchNormalizationLayer::configure(const IGCTensor *input, IGCTensor *out void GCBatchNormalizationLayer::run() { - GCScheduler::get().enqueue(_norm_kernel, true); + GCScheduler::get().dispatch(_norm_kernel, true); } diff --git a/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp index ee0b121695..689d8bee81 100755 --- a/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp @@ -63,7 +63,8 @@ void GCDepthConcatenateLayer::run() { for(unsigned i = 0; i < _num_inputs; i++) { - GCScheduler::get().enqueue(_border_handlers_vector[i], false); - GCScheduler::get().enqueue(_concat_kernels_vector[i], true); + GCScheduler::get().dispatch(_border_handlers_vector[i], false); + GCScheduler::get().memory_barrier(); + GCScheduler::get().dispatch(_concat_kernels_vector[i], true); } } diff --git a/src/runtime/GLES_COMPUTE/functions/GCDropoutLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDropoutLayer.cpp index 032c2fdb1e..6407464e48 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCDropoutLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCDropoutLayer.cpp @@ -46,5 +46,5 @@ void GCDropoutLayer::configure(const IGCTensor *input, IGCTensor *mask, IGCTenso void GCDropoutLayer::run() { - GCScheduler::get().enqueue(_dropout_kernel); + GCScheduler::get().dispatch(_dropout_kernel); } diff --git a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp index 63cb40e616..041622d255 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp @@ -159,19 +159,22 @@ void GCFullyConnectedLayer::run() // Linearize input if it comes from a convolutional layer if(_is_fc_after_conv) { - GCScheduler::get().enqueue(_im2col_kernel, false); + GCScheduler::get().dispatch(_im2col_kernel, false); } - GCScheduler::get().sync(); + if(!_are_weights_reshaped || _is_fc_after_conv) + { + GCScheduler::get().memory_barrier(); + } // Run matrix multiply - GCScheduler::get().enqueue(_mm_kernel, !_accumulate_biases); + GCScheduler::get().dispatch(_mm_kernel, !_accumulate_biases); // Accumulate biases if provided if(_accumulate_biases) { - GCScheduler::get().sync(); + GCScheduler::get().memory_barrier(); - GCScheduler::get().enqueue(_accumulate_biases_kernel); + GCScheduler::get().dispatch(_accumulate_biases_kernel); } } diff --git a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp index 1c34c038b8..7aa2d421eb 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp @@ -117,18 +117,20 @@ void GCGEMM::run() if(_is_interleaved_transposed) { // Run interleave kernel - GCScheduler::get().enqueue(_interleave_kernel, false); + GCScheduler::get().dispatch(_interleave_kernel, false); // Run transpose kernel - GCScheduler::get().enqueue(_transpose_kernel, false); + GCScheduler::get().dispatch(_transpose_kernel, false); + GCScheduler::get().memory_barrier(); } // Run matrix multiply kernel - GCScheduler::get().enqueue(_mm_kernel, !_run_addition); + GCScheduler::get().dispatch(_mm_kernel, !_run_addition); // Run matrix addition kernel if(_run_addition) { - GCScheduler::get().enqueue(_ma_kernel); + GCScheduler::get().memory_barrier(); + GCScheduler::get().dispatch(_ma_kernel); } } diff --git a/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp index d30ed52d5c..fc3882dbda 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp @@ -55,7 +55,9 @@ void GCNormalizationLayer::configure(const IGCTensor *input, IGCTensor *output, void GCNormalizationLayer::run() { - GCScheduler::get().enqueue(_multiply_kernel, false); - GCScheduler::get().enqueue(_border_handler, false); - GCScheduler::get().enqueue(_norm_kernel, false); + GCScheduler::get().dispatch(_multiply_kernel, false); + GCScheduler::get().memory_barrier(); + GCScheduler::get().dispatch(_border_handler, false); + GCScheduler::get().memory_barrier(); + GCScheduler::get().dispatch(_norm_kernel, true); } diff --git a/src/runtime/GLES_COMPUTE/functions/GCNormalizePlanarYUVLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCNormalizePlanarYUVLayer.cpp index de04a28547..5fb971c154 100755 --- a/src/runtime/GLES_COMPUTE/functions/GCNormalizePlanarYUVLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCNormalizePlanarYUVLayer.cpp @@ -44,5 +44,5 @@ void GCNormalizePlanarYUVLayer::configure(const IGCTensor *input, IGCTensor *out void GCNormalizePlanarYUVLayer::run() { - GCScheduler::get().enqueue(_norm_kernel, true); + GCScheduler::get().dispatch(_norm_kernel, true); } diff --git a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp index 34464ff057..5221c5cc5d 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp @@ -63,9 +63,9 @@ void GCSoftmaxLayer::configure(const IGCTensor *input, IGCTensor *output, float void GCSoftmaxLayer::run() { - GCScheduler::get().enqueue(_max_kernel, false); - GCScheduler::get().sync(); - GCScheduler::get().enqueue(_shift_exp_sum_kernel, false); - GCScheduler::get().sync(); - GCScheduler::get().enqueue(_norm_kernel); + GCScheduler::get().dispatch(_max_kernel, false); + GCScheduler::get().memory_barrier(); + GCScheduler::get().dispatch(_shift_exp_sum_kernel, false); + GCScheduler::get().memory_barrier(); + GCScheduler::get().dispatch(_norm_kernel); } -- cgit v1.2.1