From 9e9cbafa9e6cc6b543c89a96d52fc9c5fde04ceb Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Thu, 15 Mar 2018 14:41:34 +0000 Subject: COMPMID-1004 GLES: Add memory manager to GLES functions Change-Id: I80fc9c0dd02afd79b501abde751036f9599b7bf2 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/125103 Tested-by: Jenkins Reviewed-by: Georgios Pinitas --- src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp | 50 ++++++++++++++++++++++ src/runtime/GLES_COMPUTE/GCTensor.cpp | 4 +- src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp | 41 ++++++++++++++---- .../GLES_COMPUTE/functions/GCConvolutionLayer.cpp | 13 ++++-- .../functions/GCFullyConnectedLayer.cpp | 9 ++-- src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp | 7 ++- .../functions/GCNormalizationLayer.cpp | 9 ++-- .../GLES_COMPUTE/functions/GCSoftmaxLayer.cpp | 15 +++++-- 8 files changed, 123 insertions(+), 25 deletions(-) create mode 100644 src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp (limited to 'src/runtime/GLES_COMPUTE') diff --git a/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp b/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp new file mode 100644 index 0000000000..50e3cc7c1c --- /dev/null +++ b/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" + +#include + +using namespace arm_compute; + +void *GCBufferAllocator::allocate(size_t size, size_t alignment) +{ + ARM_COMPUTE_UNUSED(alignment); + auto *gl_buffer = new GLBufferWrapper(); + ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, gl_buffer->_ssbo_name)); + ARM_COMPUTE_GL_CHECK(glBufferData(GL_SHADER_STORAGE_BUFFER, static_cast(size), nullptr, GL_STATIC_DRAW)); + ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0)); + + return reinterpret_cast(gl_buffer); +} + +void GCBufferAllocator::free(void *ptr) +{ + ARM_COMPUTE_ERROR_ON(ptr == nullptr); + auto *gl_buffer = reinterpret_cast(ptr); + delete gl_buffer; +} diff --git a/src/runtime/GLES_COMPUTE/GCTensor.cpp b/src/runtime/GLES_COMPUTE/GCTensor.cpp index edbd16dc1d..e193d26f0a 100644 --- a/src/runtime/GLES_COMPUTE/GCTensor.cpp +++ b/src/runtime/GLES_COMPUTE/GCTensor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,7 +27,7 @@ using namespace arm_compute; GCTensor::GCTensor() - : _allocator() + : _allocator(this) { } diff --git a/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp b/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp index 694b34f1ec..abd2b483d3 100644 --- a/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp +++ b/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -31,11 +31,16 @@ using namespace arm_compute; -GCTensorAllocator::GCTensorAllocator() - : _gl_buffer(), _mapping(nullptr) +GCTensorAllocator::GCTensorAllocator(GCTensor *owner) + : _associated_memory_group(nullptr), _gl_buffer(), _mapping(nullptr), _owner(owner) { } +GCTensorAllocator::~GCTensorAllocator() +{ + _gl_buffer = support::cpp14::make_unique(); +} + uint8_t *GCTensorAllocator::data() { return _mapping; @@ -43,17 +48,35 @@ uint8_t *GCTensorAllocator::data() void GCTensorAllocator::allocate() { - _gl_buffer = support::cpp14::make_unique(); - ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, _gl_buffer->_ssbo_name)); - ARM_COMPUTE_GL_CHECK(glBufferData(GL_SHADER_STORAGE_BUFFER, static_cast(info().total_size()), nullptr, GL_STATIC_DRAW)); - ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0)); + if(_associated_memory_group == nullptr) + { + _gl_buffer = support::cpp14::make_unique(); + ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, _gl_buffer->_ssbo_name)); + ARM_COMPUTE_GL_CHECK(glBufferData(GL_SHADER_STORAGE_BUFFER, static_cast(info().total_size()), nullptr, GL_STATIC_DRAW)); + ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0)); + } + else + { + _associated_memory_group->finalize_memory(_owner, reinterpret_cast(&_gl_buffer), info().total_size()); + } info().set_is_resizable(false); } void GCTensorAllocator::free() { - _gl_buffer.reset(); - info().set_is_resizable(true); + if(_associated_memory_group == nullptr) + { + _gl_buffer.reset(); + info().set_is_resizable(true); + } +} + +void GCTensorAllocator::set_associated_memory_group(GCMemoryGroup *associated_memory_group) +{ + ARM_COMPUTE_ERROR_ON(associated_memory_group == nullptr); + ARM_COMPUTE_ERROR_ON(_associated_memory_group != nullptr); + ARM_COMPUTE_ERROR_ON(_gl_buffer.get() != nullptr); + _associated_memory_group = associated_memory_group; } uint8_t *GCTensorAllocator::lock() diff --git a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp index 5689722340..f4c073668a 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp @@ -91,9 +91,9 @@ void GCConvolutionLayerReshapeWeights::run() } } -GCConvolutionLayer::GCConvolutionLayer() - : _reshape_weights(), _input_im2col_kernel(), _input_interleave_kernel(), _mm_kernel(), _output_col2im_kernel(), _fill_border(), _input_im2col_reshaped(), _input_interleaved_reshaped(), - _weights_reshaped(), _weights_transposed(), _gemm_output(), _tmp_output(), _append_bias(false), _is_fully_connected_convolution(false), _are_weights_reshaped(false) +GCConvolutionLayer::GCConvolutionLayer(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _reshape_weights(), _input_im2col_kernel(), _input_interleave_kernel(), _mm_kernel(), _output_col2im_kernel(), _fill_border(), _input_im2col_reshaped(), + _input_interleaved_reshaped(), _weights_reshaped(), _weights_transposed(), _gemm_output(), _tmp_output(), _append_bias(false), _is_fully_connected_convolution(false), _are_weights_reshaped(false) { } @@ -196,6 +196,7 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig // FIXME: input->clone() doesn't work with subtensors for grouped convolutions. TensorInfo im2col_reshaped_info(shape_im2col, 1, dt, input->info()->fixed_point_position()); _input_im2col_reshaped.allocator()->init(im2col_reshaped_info); + _memory_group.manage(&_input_im2col_reshaped); // Create tensor (interleave) to prepare input tensor for GEMM if(run_interleaved) @@ -207,6 +208,7 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig // FIXME: input->clone() doesn't work with subtensors for grouped convolutions. TensorInfo interleaved_info(shape_interleaved, 1, dt, input->info()->fixed_point_position()); _input_interleaved_reshaped.allocator()->init(interleaved_info); + _memory_group.manage(&_input_interleaved_reshaped); } // Create GEMM output tensor @@ -218,6 +220,7 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig // FIXME: input->clone() doesn't work with subtensors for grouped convolutions. TensorInfo info_gemm(shape_gemm, 1, gemm_data_type, input->info()->fixed_point_position()); _gemm_output.allocator()->init(info_gemm); + _memory_group.manage(&_gemm_output); // Configure kernels if(dt == DataType::F16) @@ -263,6 +266,8 @@ void GCConvolutionLayer::run() _reshape_weights.run(); } + _memory_group.acquire(); + // Run im2col GCScheduler::get().dispatch(_fill_border); GCScheduler::get().memory_barrier(); @@ -282,4 +287,6 @@ void GCConvolutionLayer::run() GCScheduler::get().memory_barrier(); // Reshape output matrix GCScheduler::get().dispatch(_output_col2im_kernel, false); + + _memory_group.release(); } diff --git a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp index 9e4f0f6c95..0f8f8e6c94 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp @@ -38,9 +38,9 @@ void GCFullyConnectedLayerReshapeWeights::configure(const IGCTensor *input, IGCT _kernel = std::move(k); } -GCFullyConnectedLayer::GCFullyConnectedLayer() - : _im2col_kernel(), _reshape_weights_kernel(), _mm_kernel(), _accumulate_biases_kernel(), _im2col_output(), _reshape_weights_output(), _are_weights_reshaped(true), _is_fc_after_conv(true), - _accumulate_biases(false) +GCFullyConnectedLayer::GCFullyConnectedLayer(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _im2col_kernel(), _reshape_weights_kernel(), _mm_kernel(), _accumulate_biases_kernel(), _im2col_output(), _reshape_weights_output(), + _are_weights_reshaped(true), _is_fc_after_conv(true), _accumulate_biases(false) { } @@ -61,6 +61,7 @@ void GCFullyConnectedLayer::configure_conv_fc(const IGCTensor *input, const IGCT _im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, dt)); // Configure im2col kernel + _memory_group.manage(&_im2col_output); _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false); // Configure matrix multiply kernel @@ -156,6 +157,7 @@ void GCFullyConnectedLayer::run() _reshape_weights_kernel.run(); } + _memory_group.acquire(); // Linearize input if it comes from a convolutional layer if(_is_fc_after_conv) { @@ -177,4 +179,5 @@ void GCFullyConnectedLayer::run() GCScheduler::get().dispatch(_accumulate_biases_kernel); } + _memory_group.release(); } diff --git a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp index 5122c20504..46424a59f5 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp @@ -40,8 +40,8 @@ using namespace arm_compute; using namespace arm_compute::gles_compute; -GCGEMM::GCGEMM() - : _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _is_interleaved_transposed(false), _run_addition(false) +GCGEMM::GCGEMM(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _is_interleaved_transposed(false), _run_addition(false) { } @@ -88,6 +88,7 @@ void GCGEMM::configure(const IGCTensor *a, const IGCTensor *b, const IGCTensor * TensorInfo info_a(shape_tmp_a, 1, a->info()->data_type(), a->info()->fixed_point_position()); _tmp_a.allocator()->init(info_a); + _memory_group.manage(&_tmp_a); TensorInfo info_b(shape_tmp_b, 1, b->info()->data_type(), b->info()->fixed_point_position()); _tmp_b.allocator()->init(info_b); @@ -118,6 +119,7 @@ void GCGEMM::configure(const IGCTensor *a, const IGCTensor *b, const IGCTensor * void GCGEMM::run() { + _memory_group.acquire(); if(_is_interleaved_transposed) { // Run interleave kernel @@ -137,4 +139,5 @@ void GCGEMM::run() GCScheduler::get().memory_barrier(); GCScheduler::get().dispatch(_ma_kernel); } + _memory_group.release(); } diff --git a/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp index fc3882dbda..13213d2b54 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -33,8 +33,8 @@ using namespace arm_compute; -GCNormalizationLayer::GCNormalizationLayer() - : _squared_input(), _norm_kernel(), _multiply_kernel(), _border_handler() +GCNormalizationLayer::GCNormalizationLayer(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _squared_input(), _norm_kernel(), _multiply_kernel(), _border_handler() { } @@ -43,6 +43,7 @@ void GCNormalizationLayer::configure(const IGCTensor *input, IGCTensor *output, ARM_COMPUTE_ERROR_ON(input == nullptr); _squared_input.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, input->info()->data_type())); + _memory_group.manage(&_squared_input); _norm_kernel.configure(input, &_squared_input, output, norm_info); _multiply_kernel.configure(input, input, &_squared_input, 1.0f); @@ -55,9 +56,11 @@ void GCNormalizationLayer::configure(const IGCTensor *input, IGCTensor *output, void GCNormalizationLayer::run() { + _memory_group.acquire(); GCScheduler::get().dispatch(_multiply_kernel, false); GCScheduler::get().memory_barrier(); GCScheduler::get().dispatch(_border_handler, false); GCScheduler::get().memory_barrier(); GCScheduler::get().dispatch(_norm_kernel, true); + _memory_group.release(); } diff --git a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp index 5221c5cc5d..1748a5952b 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -29,8 +29,8 @@ using namespace arm_compute; -GCSoftmaxLayer::GCSoftmaxLayer() - : _max_kernel(), _shift_exp_sum_kernel(), _norm_kernel(), _max(), _sum(), _tmp() +GCSoftmaxLayer::GCSoftmaxLayer(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _max_kernel(), _shift_exp_sum_kernel(), _norm_kernel(), _max(), _sum(), _tmp() { } @@ -50,6 +50,11 @@ void GCSoftmaxLayer::configure(const IGCTensor *input, IGCTensor *output, float _max.allocator()->init(tensor_info_max_sum); _sum.allocator()->init(tensor_info_max_sum); + // Manage intermediate buffers + _memory_group.manage(&_tmp); + _memory_group.manage(&_max); + _memory_group.manage(&_sum); + // Configure Kernels _max_kernel.configure(input, &_max); _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum); @@ -63,9 +68,13 @@ void GCSoftmaxLayer::configure(const IGCTensor *input, IGCTensor *output, float void GCSoftmaxLayer::run() { + _memory_group.acquire(); + GCScheduler::get().dispatch(_max_kernel, false); GCScheduler::get().memory_barrier(); GCScheduler::get().dispatch(_shift_exp_sum_kernel, false); GCScheduler::get().memory_barrier(); GCScheduler::get().dispatch(_norm_kernel); + + _memory_group.release(); } -- cgit v1.2.1