From ceff0f9a991b693f568c25b1e0933582301082e7 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 19 Mar 2018 19:57:01 +0000 Subject: COMPMID-1016: Optimize kernel reconfiguration Optimizes kernel reconfiguration when memory manager is used. Note that this works only if every sub-sequent reconfigurations leads to sizes less than the first one. Change-Id: I08898e99929c3756147a02979b726c2380b6e11d Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/125114 Reviewed-by: Anthony Barbier Tested-by: Jenkins --- src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp | 5 +++-- src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp | 10 ++++++++-- src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp | 4 ++++ src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp | 2 ++ 4 files changed, 17 insertions(+), 4 deletions(-) (limited to 'src/runtime/GLES_COMPUTE') diff --git a/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp b/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp index 50e3cc7c1c..d8f6867634 100644 --- a/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp +++ b/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp @@ -29,8 +29,8 @@ #include -using namespace arm_compute; - +namespace arm_compute +{ void *GCBufferAllocator::allocate(size_t size, size_t alignment) { ARM_COMPUTE_UNUSED(alignment); @@ -48,3 +48,4 @@ void GCBufferAllocator::free(void *ptr) auto *gl_buffer = reinterpret_cast(ptr); delete gl_buffer; } +} // namespace arm_compute diff --git a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp index 0f8f8e6c94..a300033bb2 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp @@ -79,7 +79,8 @@ void GCFullyConnectedLayer::configure_fc_fc(const IGCTensor *input, const IGCTen _mm_kernel.configure(input, weights, output, 1.0f, false); } -void GCFullyConnectedLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, bool transpose_weights, bool are_weights_reshaped) +void GCFullyConnectedLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, + bool transpose_weights, bool are_weights_reshaped, bool retain_internal_weights) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); @@ -141,11 +142,14 @@ void GCFullyConnectedLayer::configure(const IGCTensor *input, const IGCTensor *w } // Allocate the transpose tensor if the are_weights_reshaped flag is false and once all the configure methods have been called - if(!_are_weights_reshaped) + if(!_are_weights_reshaped && !retain_internal_weights) { // Allocate the tensor for the weights reshaped _reshape_weights_output.allocator()->allocate(); } + + ARM_COMPUTE_ERROR_ON(retain_internal_weights && _reshape_weights_output.gc_buffer() == 0); + _are_weights_reshaped = _are_weights_reshaped || retain_internal_weights; } void GCFullyConnectedLayer::run() @@ -158,6 +162,7 @@ void GCFullyConnectedLayer::run() } _memory_group.acquire(); + // Linearize input if it comes from a convolutional layer if(_is_fc_after_conv) { @@ -179,5 +184,6 @@ void GCFullyConnectedLayer::run() GCScheduler::get().dispatch(_accumulate_biases_kernel); } + _memory_group.release(); } diff --git a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp index 46424a59f5..9c8568a329 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp @@ -92,6 +92,10 @@ void GCGEMM::configure(const IGCTensor *a, const IGCTensor *b, const IGCTensor * TensorInfo info_b(shape_tmp_b, 1, b->info()->data_type(), b->info()->fixed_point_position()); _tmp_b.allocator()->init(info_b); + if(!gemm_info.reshape_b_only_on_first_run()) + { + _memory_group.manage(&_tmp_b); + } // Configure interleave kernel _interleave_kernel.configure(a, &_tmp_a); diff --git a/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp index 13213d2b54..b2e69ee8c6 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp @@ -57,10 +57,12 @@ void GCNormalizationLayer::configure(const IGCTensor *input, IGCTensor *output, void GCNormalizationLayer::run() { _memory_group.acquire(); + GCScheduler::get().dispatch(_multiply_kernel, false); GCScheduler::get().memory_barrier(); GCScheduler::get().dispatch(_border_handler, false); GCScheduler::get().memory_barrier(); GCScheduler::get().dispatch(_norm_kernel, true); + _memory_group.release(); } -- cgit v1.2.1