From 4406fd6cc4abded564d3791324e1f48bdfd34273 Mon Sep 17 00:00:00 2001 From: Frank Lei Date: Thu, 1 Feb 2018 14:47:14 +0800 Subject: APPBROWSER-391: Fix GLES COMPUTE alignment issues APPBROWSER-402: Performance optimization for squeezenet/xray model Change-Id: If31b186b99a6d6087164019fe94d3ac9279e3204 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/119526 Tested-by: Jenkins Reviewed-by: Georgios Pinitas --- .../functions/GCDepthwiseConvolutionLayer.cpp | 18 +++++++++++++++++- .../functions/GCDirectConvolutionLayer.cpp | 9 +++++---- .../GLES_COMPUTE/functions/GCPoolingLayer.cpp | 22 ++++++++++++++++++++-- 3 files changed, 42 insertions(+), 7 deletions(-) (limited to 'src/runtime/GLES_COMPUTE') diff --git a/src/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.cpp index ef65989f40..9cba37110b 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,11 @@ using namespace arm_compute; +GCDepthwiseConvolutionLayer3x3::GCDepthwiseConvolutionLayer3x3() + : _kernel(nullptr), _border_handler(), _shift_handler() +{ +} + void GCDepthwiseConvolutionLayer3x3::configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info) { auto k = arm_compute::support::cpp14::make_unique(); @@ -38,4 +43,15 @@ void GCDepthwiseConvolutionLayer3x3::configure(IGCTensor *input, const IGCTensor // Configure border handler _border_handler.configure(input, _kernel->border_size(), BorderMode::CONSTANT, PixelValue(0)); + + _shift_handler.configure(input); +} + +void GCDepthwiseConvolutionLayer3x3::run() +{ + GCScheduler::get().dispatch(_shift_handler, false); + GCScheduler::get().memory_barrier(); + GCScheduler::get().dispatch(_border_handler, false); + GCScheduler::get().memory_barrier(); + GCScheduler::get().dispatch(*_kernel); } diff --git a/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp index 769733ca66..a2607d4c2d 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp @@ -33,12 +33,13 @@ #include "support/ToolchainSupport.h" using namespace arm_compute; + GCDirectConvolutionLayer::GCDirectConvolutionLayer() : _kernel(nullptr), _border_handler(), _shift_handler() { } -void GCDirectConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info) +void GCDirectConvolutionLayer::configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info) { int kernel_size = weights->info()->dimension(0); @@ -68,14 +69,14 @@ void GCDirectConvolutionLayer::configure(const IGCTensor *input, const IGCTensor _border_handler.configure(input, _kernel->border_size(), BorderMode::CONSTANT, PixelValue(0)); - _shift_handler.configure(output); + _shift_handler.configure(input); } void GCDirectConvolutionLayer::run() { + GCScheduler::get().dispatch(_shift_handler, false); + GCScheduler::get().memory_barrier(); GCScheduler::get().dispatch(_border_handler, false); GCScheduler::get().memory_barrier(); GCScheduler::get().dispatch(*_kernel); - GCScheduler::get().memory_barrier(); - GCScheduler::get().dispatch(_shift_handler); } diff --git a/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp index ff03effd3f..dcbb39d87d 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -25,10 +25,17 @@ #include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" + #include "support/ToolchainSupport.h" using namespace arm_compute; +GCPoolingLayer::GCPoolingLayer() + : _kernel(nullptr), _border_handler(), _shift_handler() +{ +} + void GCPoolingLayer::configure(IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info) { // Configure pooling kernel @@ -39,9 +46,20 @@ void GCPoolingLayer::configure(IGCTensor *input, IGCTensor *output, const Poolin // Configure border depending on operation required BorderMode border_mode = (PoolingType::MAX == pool_info.pool_type()) ? BorderMode::REPLICATE : BorderMode::CONSTANT; _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(0.0f)); + + _shift_handler.configure(input); } Status GCPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info) { return GCPoolingLayerKernel::validate(input, output, pool_info); -} \ No newline at end of file +} + +void GCPoolingLayer::run() +{ + GCScheduler::get().dispatch(_shift_handler, false); + GCScheduler::get().memory_barrier(); + GCScheduler::get().dispatch(_border_handler, false); + GCScheduler::get().memory_barrier(); + GCScheduler::get().dispatch(*_kernel); +} -- cgit v1.2.1