diff options
author | Frank Lei <frank.lei@arm.com> | 2018-02-01 14:47:14 +0800 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:47:40 +0000 |
commit | 4406fd6cc4abded564d3791324e1f48bdfd34273 (patch) | |
tree | 22fe402fe9ac7ca338df49e9eccd6eb1587ae875 /src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp | |
parent | 898d399a0f62c15612a52df4bff5018e783214e4 (diff) | |
download | ComputeLibrary-4406fd6cc4abded564d3791324e1f48bdfd34273.tar.gz |
APPBROWSER-391: Fix GLES COMPUTE alignment issues
APPBROWSER-402: Performance optimization for squeezenet/xray model
Change-Id: If31b186b99a6d6087164019fe94d3ac9279e3204
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/119526
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp')
-rw-r--r-- | src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp index 769733ca66..a2607d4c2d 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp @@ -33,12 +33,13 @@ #include "support/ToolchainSupport.h" using namespace arm_compute; + GCDirectConvolutionLayer::GCDirectConvolutionLayer() : _kernel(nullptr), _border_handler(), _shift_handler() { } -void GCDirectConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info) +void GCDirectConvolutionLayer::configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info) { int kernel_size = weights->info()->dimension(0); @@ -68,14 +69,14 @@ void GCDirectConvolutionLayer::configure(const IGCTensor *input, const IGCTensor _border_handler.configure(input, _kernel->border_size(), BorderMode::CONSTANT, PixelValue(0)); - _shift_handler.configure(output); + _shift_handler.configure(input); } void GCDirectConvolutionLayer::run() { + GCScheduler::get().dispatch(_shift_handler, false); + GCScheduler::get().memory_barrier(); GCScheduler::get().dispatch(_border_handler, false); GCScheduler::get().memory_barrier(); GCScheduler::get().dispatch(*_kernel); - GCScheduler::get().memory_barrier(); - GCScheduler::get().dispatch(_shift_handler); } |