APPBROWSER-391: Fix GLES COMPUTE alignment issues

APPBROWSER-402: Performance optimization for squeezenet/xray model Change-Id: If31b186b99a6d6087164019fe94d3ac9279e3204 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/119526 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
author: Frank Lei <frank.lei@arm.com> 2018-02-01 14:47:14 +0800
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:47:40 +0000
commit: 4406fd6cc4abded564d3791324e1f48bdfd34273 (patch)
tree: 22fe402fe9ac7ca338df49e9eccd6eb1587ae875 /src/runtime/GLES_COMPUTE
parent: 898d399a0f62c15612a52df4bff5018e783214e4 (diff)
download: ComputeLibrary-4406fd6cc4abded564d3791324e1f48bdfd34273.tar.gz
3 files changed, 42 insertions, 7 deletions
diff --git a/src/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.cpp
index ef65989f40..9cba37110b 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,6 +30,11 @@
 
 using namespace arm_compute;
 
+GCDepthwiseConvolutionLayer3x3::GCDepthwiseConvolutionLayer3x3()
+    : _kernel(nullptr), _border_handler(), _shift_handler()
+{
+}
+
 void GCDepthwiseConvolutionLayer3x3::configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info)
 {
     auto k = arm_compute::support::cpp14::make_unique<GCDepthwiseConvolutionLayer3x3Kernel>();
@@ -38,4 +43,15 @@ void GCDepthwiseConvolutionLayer3x3::configure(IGCTensor *input, const IGCTensor
 
     // Configure border handler
     _border_handler.configure(input, _kernel->border_size(), BorderMode::CONSTANT, PixelValue(0));
+
+    _shift_handler.configure(input);
+}
+
+void GCDepthwiseConvolutionLayer3x3::run()
+{
+    GCScheduler::get().dispatch(_shift_handler, false);
+    GCScheduler::get().memory_barrier();
+    GCScheduler::get().dispatch(_border_handler, false);
+    GCScheduler::get().memory_barrier();
+    GCScheduler::get().dispatch(*_kernel);
 }
diff --git a/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp
index 769733ca66..a2607d4c2d 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp
@@ -33,12 +33,13 @@
 #include "support/ToolchainSupport.h"
 
 using namespace arm_compute;
+
 GCDirectConvolutionLayer::GCDirectConvolutionLayer()
     : _kernel(nullptr), _border_handler(), _shift_handler()
 {
 }
 
-void GCDirectConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info)
+void GCDirectConvolutionLayer::configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info)
 {
     int kernel_size = weights->info()->dimension(0);
 
@@ -68,14 +69,14 @@ void GCDirectConvolutionLayer::configure(const IGCTensor *input, const IGCTensor
 
     _border_handler.configure(input, _kernel->border_size(), BorderMode::CONSTANT, PixelValue(0));
 
-    _shift_handler.configure(output);
+    _shift_handler.configure(input);
 }
 
 void GCDirectConvolutionLayer::run()
 {
+    GCScheduler::get().dispatch(_shift_handler, false);
+    GCScheduler::get().memory_barrier();
     GCScheduler::get().dispatch(_border_handler, false);
     GCScheduler::get().memory_barrier();
     GCScheduler::get().dispatch(*_kernel);
-    GCScheduler::get().memory_barrier();
-    GCScheduler::get().dispatch(_shift_handler);
 }
diff --git a/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp
index ff03effd3f..dcbb39d87d 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,10 +25,17 @@
 
 #include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
 #include "arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+
 #include "support/ToolchainSupport.h"
 
 using namespace arm_compute;
 
+GCPoolingLayer::GCPoolingLayer()
+    : _kernel(nullptr), _border_handler(), _shift_handler()
+{
+}
+
 void GCPoolingLayer::configure(IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info)
 {
     // Configure pooling kernel
@@ -39,9 +46,20 @@ void GCPoolingLayer::configure(IGCTensor *input, IGCTensor *output, const Poolin
     // Configure border depending on operation required
     BorderMode border_mode = (PoolingType::MAX == pool_info.pool_type()) ? BorderMode::REPLICATE : BorderMode::CONSTANT;
     _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(0.0f));
+
+    _shift_handler.configure(input);
 }
 
 Status GCPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
 {
     return GCPoolingLayerKernel::validate(input, output, pool_info);
-}
-\ No newline at end of file
+}
+
+void GCPoolingLayer::run()
+{
+    GCScheduler::get().dispatch(_shift_handler, false);
+    GCScheduler::get().memory_barrier();
+    GCScheduler::get().dispatch(_border_handler, false);
+    GCScheduler::get().memory_barrier();
+    GCScheduler::get().dispatch(*_kernel);
+}
author	Frank Lei <frank.lei@arm.com>	2018-02-01 14:47:14 +0800
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:47:40 +0000
commit	4406fd6cc4abded564d3791324e1f48bdfd34273 (patch)
tree	22fe402fe9ac7ca338df49e9eccd6eb1587ae875 /src/runtime/GLES_COMPUTE
parent	898d399a0f62c15612a52df4bff5018e783214e4 (diff)
download	ComputeLibrary-4406fd6cc4abded564d3791324e1f48bdfd34273.tar.gz