diff options
author | Michalis Spyrou <michalis.spyrou@arm.com> | 2018-03-15 14:41:34 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:49:16 +0000 |
commit | 9e9cbafa9e6cc6b543c89a96d52fc9c5fde04ceb (patch) | |
tree | eb8ae82627c447e530e2745788c371f708c887a5 | |
parent | be0ae93c50bfa3e588111585025278daa8cb0694 (diff) | |
download | ComputeLibrary-9e9cbafa9e6cc6b543c89a96d52fc9c5fde04ceb.tar.gz |
COMPMID-1004 GLES: Add memory manager to GLES functions
Change-Id: I80fc9c0dd02afd79b501abde751036f9599b7bf2
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/125103
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
17 files changed, 271 insertions, 53 deletions
diff --git a/arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h b/arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h new file mode 100644 index 0000000000..103ac37bdd --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + *gc + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCBUFFERALLOCATOR_H__ +#define __ARM_COMPUTE_GCBUFFERALLOCATOR_H__ + +#include "arm_compute/runtime/IAllocator.h" + +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" + +#include <cstddef> + +namespace arm_compute +{ +/** Default gles buffer allocator implementation */ +class GCBufferAllocator : public IAllocator +{ +public: + /** Default constructor */ + GCBufferAllocator() = default; + + // Inherited methods overridden: + void *allocate(size_t size, size_t alignment) override; + void free(void *ptr) override; +}; +} // arm_compute +#endif /*__ARM_COMPUTE_GCBUFFERALLOCATOR_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h b/arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h new file mode 100644 index 0000000000..485aa0e024 --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCMEMORYGROUP_H__ +#define __ARM_COMPUTE_GCMEMORYGROUP_H__ + +#include "arm_compute/runtime/MemoryGroupBase.h" + +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/utils/misc/Cast.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" + +namespace arm_compute +{ +using GCMemoryGroup = MemoryGroupBase<GCTensor>; + +template <> +inline void MemoryGroupBase<GCTensor>::associate_memory_group(GCTensor *obj) +{ + ARM_COMPUTE_ERROR_ON(obj == nullptr); + ARM_COMPUTE_ERROR_ON(dynamic_cast<GCTensorAllocator *>(obj->allocator()) == nullptr); + + auto allocator = arm_compute::utils::cast::polymorphic_downcast<GCTensorAllocator *>(obj->allocator()); + ARM_COMPUTE_ERROR_ON(allocator == nullptr); + allocator->set_associated_memory_group(this); +} +} // arm_compute +#endif /*__ARM_COMPUTE_GCMEMORYGROUP_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/GCTensor.h b/arm_compute/runtime/GLES_COMPUTE/GCTensor.h index 3e51f9908f..0f5f194266 100644 --- a/arm_compute/runtime/GLES_COMPUTE/GCTensor.h +++ b/arm_compute/runtime/GLES_COMPUTE/GCTensor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -91,7 +91,7 @@ protected: void do_unmap() override; private: - mutable GCTensorAllocator _allocator; + mutable GCTensorAllocator _allocator; /**< Instance of the OpenGL ES tensor allocator */ }; using GCImage = GCTensor; diff --git a/arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h b/arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h index ce52cbbbdc..fc14f04ac2 100644 --- a/arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h +++ b/arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,17 +27,37 @@ #include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" #include "arm_compute/runtime/ITensorAllocator.h" +#include "arm_compute/runtime/MemoryGroupBase.h" #include <memory> namespace arm_compute { +class GCTensor; +template <typename> +class MemoryGroupBase; +using GCMemoryGroup = MemoryGroupBase<GCTensor>; + +class GLBufferWrapper +{ +public: + GLBufferWrapper() + : _ssbo_name(0) + { + ARM_COMPUTE_GL_CHECK(glGenBuffers(1, &_ssbo_name)); + } + ~GLBufferWrapper() + { + ARM_COMPUTE_GL_CHECK(glDeleteBuffers(1, &_ssbo_name)); + } + GLuint _ssbo_name; +}; /** Basic implementation of a GLES memory tensor allocator. */ class GCTensorAllocator : public ITensorAllocator { public: /** Default constructor. */ - GCTensorAllocator(); + GCTensorAllocator(GCTensor *owner = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers). */ GCTensorAllocator(const GCTensorAllocator &) = delete; @@ -52,7 +72,7 @@ public: GCTensorAllocator &operator=(GCTensorAllocator &&) = default; /** Default destructor */ - ~GCTensorAllocator() = default; + ~GCTensorAllocator(); /** Interface to be implemented by the child class to return the pointer to the mapped data. */ uint8_t *data(); @@ -95,6 +115,12 @@ public: */ void free() override; + /** Associates the tensor with a memory group + * + * @param[in] associated_memory_group Memory group to associate the tensor with + */ + void set_associated_memory_group(GCMemoryGroup *associated_memory_group); + protected: /** Call map() on the SSBO. * @@ -106,22 +132,10 @@ protected: void unlock() override; private: - class GLBufferWrapper - { - public: - GLBufferWrapper() - : _ssbo_name(0) - { - ARM_COMPUTE_GL_CHECK(glGenBuffers(1, &_ssbo_name)); - } - ~GLBufferWrapper() - { - ARM_COMPUTE_GL_CHECK(glDeleteBuffers(1, &_ssbo_name)); - } - GLuint _ssbo_name; - }; - std::unique_ptr<GLBufferWrapper> _gl_buffer; - uint8_t *_mapping; + GCMemoryGroup *_associated_memory_group; /**< Registered memory group */ + std::unique_ptr<GLBufferWrapper> _gl_buffer; /**< OpenGL ES object containing the tensor data. */ + uint8_t *_mapping; /**< Pointer to the CPU mapping of the OpenGL ES buffer. */ + GCTensor *_owner; /**< Owner of the allocator */ }; } diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h index e3fa98e6e7..2bac982d0c 100644 --- a/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h @@ -33,6 +33,7 @@ #include "arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h" #include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" #include "arm_compute/runtime/IFunction.h" @@ -83,7 +84,7 @@ class GCConvolutionLayer : public IFunction { public: /** Default constructor */ - GCConvolutionLayer(); + GCConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Set the input and output tensors. * @@ -115,6 +116,7 @@ private: void configure_mm(const IGCTensor *input, const IGCTensor *weights, IGCTensor *output, bool is_interleaved_transposed = true); private: + GCMemoryGroup _memory_group; GCConvolutionLayerReshapeWeights _reshape_weights; GCIm2ColKernel _input_im2col_kernel; GCGEMMInterleave4x4Kernel _input_interleave_kernel; diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h index 3ba44f59cb..81be1de21a 100644 --- a/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h @@ -28,6 +28,7 @@ #include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h" #include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" #include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h" @@ -63,7 +64,7 @@ class GCFullyConnectedLayer : public IFunction { public: /** Constructor */ - GCFullyConnectedLayer(); + GCFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Set the input and output tensors. * * @param[in] input Source tensor. Data type supported: F16/F32. @@ -82,6 +83,7 @@ private: void configure_fc_fc(const IGCTensor *input, const IGCTensor *weights, IGCTensor *output); void configure_conv_fc(const IGCTensor *input, const IGCTensor *weights, IGCTensor *output); + GCMemoryGroup _memory_group; GCIm2ColKernel _im2col_kernel; GCFullyConnectedLayerReshapeWeights _reshape_weights_kernel; GCGEMMMatrixMultiplyKernel _mm_kernel; diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h index 8ddfae1169..31ad0abaa0 100644 --- a/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h @@ -29,6 +29,7 @@ #include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h" #include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" #include "arm_compute/runtime/IFunction.h" @@ -48,7 +49,7 @@ class GCGEMM : public IFunction { public: /** Default constructor. */ - GCGEMM(); + GCGEMM(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Initialise the kernel's inputs and output * @@ -73,6 +74,7 @@ public: void run() override; private: + GCMemoryGroup _memory_group; GCGEMMInterleave4x4Kernel _interleave_kernel; GCGEMMTranspose1xWKernel _transpose_kernel; GCGEMMMatrixMultiplyKernel _mm_kernel; diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h index d080a2f7b9..adc8157f0e 100644 --- a/arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,7 @@ #include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h" #include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" #include "arm_compute/core/Types.h" @@ -48,7 +49,7 @@ class GCNormalizationLayer : public IFunction { public: /** Default constructor */ - GCNormalizationLayer(); + GCNormalizationLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], @@ -62,6 +63,7 @@ public: void run() override; private: + GCMemoryGroup _memory_group; GCTensor _squared_input; /**< The intermediate buffer which stores results of squaring input*/ GCNormalizationLayerKernel _norm_kernel; /**< Normalization layer kernel to run */ GCPixelWiseMultiplicationKernel _multiply_kernel; /**< Pixel multiplication kernel to run */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h index e7f8d5053a..1011c9a2ef 100644 --- a/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,7 @@ #define __ARM_COMPUTE_GCSOFTMAXLAYER_H__ #include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h" #include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" #include "arm_compute/runtime/IFunction.h" @@ -46,7 +47,7 @@ class GCSoftmaxLayer : public IFunction { public: /** Constructor */ - GCSoftmaxLayer(); + GCSoftmaxLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Set the input and output tensors. * * @param[in] input Source tensor. Data types supported: F16/F32 @@ -59,6 +60,7 @@ public: void run() override; private: + GCMemoryGroup _memory_group; GCLogits1DMaxKernel _max_kernel; GCLogits1DShiftExpSumKernel _shift_exp_sum_kernel; GCLogits1DNormKernel _norm_kernel; diff --git a/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp b/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp new file mode 100644 index 0000000000..50e3cc7c1c --- /dev/null +++ b/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" + +#include <cstddef> + +using namespace arm_compute; + +void *GCBufferAllocator::allocate(size_t size, size_t alignment) +{ + ARM_COMPUTE_UNUSED(alignment); + auto *gl_buffer = new GLBufferWrapper(); + ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, gl_buffer->_ssbo_name)); + ARM_COMPUTE_GL_CHECK(glBufferData(GL_SHADER_STORAGE_BUFFER, static_cast<GLsizeiptr>(size), nullptr, GL_STATIC_DRAW)); + ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0)); + + return reinterpret_cast<void *>(gl_buffer); +} + +void GCBufferAllocator::free(void *ptr) +{ + ARM_COMPUTE_ERROR_ON(ptr == nullptr); + auto *gl_buffer = reinterpret_cast<GLBufferWrapper *>(ptr); + delete gl_buffer; +} diff --git a/src/runtime/GLES_COMPUTE/GCTensor.cpp b/src/runtime/GLES_COMPUTE/GCTensor.cpp index edbd16dc1d..e193d26f0a 100644 --- a/src/runtime/GLES_COMPUTE/GCTensor.cpp +++ b/src/runtime/GLES_COMPUTE/GCTensor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,7 +27,7 @@ using namespace arm_compute; GCTensor::GCTensor() - : _allocator() + : _allocator(this) { } diff --git a/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp b/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp index 694b34f1ec..abd2b483d3 100644 --- a/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp +++ b/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -31,11 +31,16 @@ using namespace arm_compute; -GCTensorAllocator::GCTensorAllocator() - : _gl_buffer(), _mapping(nullptr) +GCTensorAllocator::GCTensorAllocator(GCTensor *owner) + : _associated_memory_group(nullptr), _gl_buffer(), _mapping(nullptr), _owner(owner) { } +GCTensorAllocator::~GCTensorAllocator() +{ + _gl_buffer = support::cpp14::make_unique<GLBufferWrapper>(); +} + uint8_t *GCTensorAllocator::data() { return _mapping; @@ -43,17 +48,35 @@ uint8_t *GCTensorAllocator::data() void GCTensorAllocator::allocate() { - _gl_buffer = support::cpp14::make_unique<GLBufferWrapper>(); - ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, _gl_buffer->_ssbo_name)); - ARM_COMPUTE_GL_CHECK(glBufferData(GL_SHADER_STORAGE_BUFFER, static_cast<GLsizeiptr>(info().total_size()), nullptr, GL_STATIC_DRAW)); - ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0)); + if(_associated_memory_group == nullptr) + { + _gl_buffer = support::cpp14::make_unique<GLBufferWrapper>(); + ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, _gl_buffer->_ssbo_name)); + ARM_COMPUTE_GL_CHECK(glBufferData(GL_SHADER_STORAGE_BUFFER, static_cast<GLsizeiptr>(info().total_size()), nullptr, GL_STATIC_DRAW)); + ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0)); + } + else + { + _associated_memory_group->finalize_memory(_owner, reinterpret_cast<void **>(&_gl_buffer), info().total_size()); + } info().set_is_resizable(false); } void GCTensorAllocator::free() { - _gl_buffer.reset(); - info().set_is_resizable(true); + if(_associated_memory_group == nullptr) + { + _gl_buffer.reset(); + info().set_is_resizable(true); + } +} + +void GCTensorAllocator::set_associated_memory_group(GCMemoryGroup *associated_memory_group) +{ + ARM_COMPUTE_ERROR_ON(associated_memory_group == nullptr); + ARM_COMPUTE_ERROR_ON(_associated_memory_group != nullptr); + ARM_COMPUTE_ERROR_ON(_gl_buffer.get() != nullptr); + _associated_memory_group = associated_memory_group; } uint8_t *GCTensorAllocator::lock() diff --git a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp index 5689722340..f4c073668a 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp @@ -91,9 +91,9 @@ void GCConvolutionLayerReshapeWeights::run() } } -GCConvolutionLayer::GCConvolutionLayer() - : _reshape_weights(), _input_im2col_kernel(), _input_interleave_kernel(), _mm_kernel(), _output_col2im_kernel(), _fill_border(), _input_im2col_reshaped(), _input_interleaved_reshaped(), - _weights_reshaped(), _weights_transposed(), _gemm_output(), _tmp_output(), _append_bias(false), _is_fully_connected_convolution(false), _are_weights_reshaped(false) +GCConvolutionLayer::GCConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager) + : _memory_group(std::move(memory_manager)), _reshape_weights(), _input_im2col_kernel(), _input_interleave_kernel(), _mm_kernel(), _output_col2im_kernel(), _fill_border(), _input_im2col_reshaped(), + _input_interleaved_reshaped(), _weights_reshaped(), _weights_transposed(), _gemm_output(), _tmp_output(), _append_bias(false), _is_fully_connected_convolution(false), _are_weights_reshaped(false) { } @@ -196,6 +196,7 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig // FIXME: input->clone() doesn't work with subtensors for grouped convolutions. TensorInfo im2col_reshaped_info(shape_im2col, 1, dt, input->info()->fixed_point_position()); _input_im2col_reshaped.allocator()->init(im2col_reshaped_info); + _memory_group.manage(&_input_im2col_reshaped); // Create tensor (interleave) to prepare input tensor for GEMM if(run_interleaved) @@ -207,6 +208,7 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig // FIXME: input->clone() doesn't work with subtensors for grouped convolutions. TensorInfo interleaved_info(shape_interleaved, 1, dt, input->info()->fixed_point_position()); _input_interleaved_reshaped.allocator()->init(interleaved_info); + _memory_group.manage(&_input_interleaved_reshaped); } // Create GEMM output tensor @@ -218,6 +220,7 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig // FIXME: input->clone() doesn't work with subtensors for grouped convolutions. TensorInfo info_gemm(shape_gemm, 1, gemm_data_type, input->info()->fixed_point_position()); _gemm_output.allocator()->init(info_gemm); + _memory_group.manage(&_gemm_output); // Configure kernels if(dt == DataType::F16) @@ -263,6 +266,8 @@ void GCConvolutionLayer::run() _reshape_weights.run(); } + _memory_group.acquire(); + // Run im2col GCScheduler::get().dispatch(_fill_border); GCScheduler::get().memory_barrier(); @@ -282,4 +287,6 @@ void GCConvolutionLayer::run() GCScheduler::get().memory_barrier(); // Reshape output matrix GCScheduler::get().dispatch(_output_col2im_kernel, false); + + _memory_group.release(); } diff --git a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp index 9e4f0f6c95..0f8f8e6c94 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp @@ -38,9 +38,9 @@ void GCFullyConnectedLayerReshapeWeights::configure(const IGCTensor *input, IGCT _kernel = std::move(k); } -GCFullyConnectedLayer::GCFullyConnectedLayer() - : _im2col_kernel(), _reshape_weights_kernel(), _mm_kernel(), _accumulate_biases_kernel(), _im2col_output(), _reshape_weights_output(), _are_weights_reshaped(true), _is_fc_after_conv(true), - _accumulate_biases(false) +GCFullyConnectedLayer::GCFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager) + : _memory_group(std::move(memory_manager)), _im2col_kernel(), _reshape_weights_kernel(), _mm_kernel(), _accumulate_biases_kernel(), _im2col_output(), _reshape_weights_output(), + _are_weights_reshaped(true), _is_fc_after_conv(true), _accumulate_biases(false) { } @@ -61,6 +61,7 @@ void GCFullyConnectedLayer::configure_conv_fc(const IGCTensor *input, const IGCT _im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, dt)); // Configure im2col kernel + _memory_group.manage(&_im2col_output); _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false); // Configure matrix multiply kernel @@ -156,6 +157,7 @@ void GCFullyConnectedLayer::run() _reshape_weights_kernel.run(); } + _memory_group.acquire(); // Linearize input if it comes from a convolutional layer if(_is_fc_after_conv) { @@ -177,4 +179,5 @@ void GCFullyConnectedLayer::run() GCScheduler::get().dispatch(_accumulate_biases_kernel); } + _memory_group.release(); } diff --git a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp index 5122c20504..46424a59f5 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp @@ -40,8 +40,8 @@ using namespace arm_compute; using namespace arm_compute::gles_compute; -GCGEMM::GCGEMM() - : _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _is_interleaved_transposed(false), _run_addition(false) +GCGEMM::GCGEMM(std::shared_ptr<IMemoryManager> memory_manager) + : _memory_group(std::move(memory_manager)), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _is_interleaved_transposed(false), _run_addition(false) { } @@ -88,6 +88,7 @@ void GCGEMM::configure(const IGCTensor *a, const IGCTensor *b, const IGCTensor * TensorInfo info_a(shape_tmp_a, 1, a->info()->data_type(), a->info()->fixed_point_position()); _tmp_a.allocator()->init(info_a); + _memory_group.manage(&_tmp_a); TensorInfo info_b(shape_tmp_b, 1, b->info()->data_type(), b->info()->fixed_point_position()); _tmp_b.allocator()->init(info_b); @@ -118,6 +119,7 @@ void GCGEMM::configure(const IGCTensor *a, const IGCTensor *b, const IGCTensor * void GCGEMM::run() { + _memory_group.acquire(); if(_is_interleaved_transposed) { // Run interleave kernel @@ -137,4 +139,5 @@ void GCGEMM::run() GCScheduler::get().memory_barrier(); GCScheduler::get().dispatch(_ma_kernel); } + _memory_group.release(); } diff --git a/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp index fc3882dbda..13213d2b54 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -33,8 +33,8 @@ using namespace arm_compute; -GCNormalizationLayer::GCNormalizationLayer() - : _squared_input(), _norm_kernel(), _multiply_kernel(), _border_handler() +GCNormalizationLayer::GCNormalizationLayer(std::shared_ptr<IMemoryManager> memory_manager) + : _memory_group(std::move(memory_manager)), _squared_input(), _norm_kernel(), _multiply_kernel(), _border_handler() { } @@ -43,6 +43,7 @@ void GCNormalizationLayer::configure(const IGCTensor *input, IGCTensor *output, ARM_COMPUTE_ERROR_ON(input == nullptr); _squared_input.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, input->info()->data_type())); + _memory_group.manage(&_squared_input); _norm_kernel.configure(input, &_squared_input, output, norm_info); _multiply_kernel.configure(input, input, &_squared_input, 1.0f); @@ -55,9 +56,11 @@ void GCNormalizationLayer::configure(const IGCTensor *input, IGCTensor *output, void GCNormalizationLayer::run() { + _memory_group.acquire(); GCScheduler::get().dispatch(_multiply_kernel, false); GCScheduler::get().memory_barrier(); GCScheduler::get().dispatch(_border_handler, false); GCScheduler::get().memory_barrier(); GCScheduler::get().dispatch(_norm_kernel, true); + _memory_group.release(); } diff --git a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp index 5221c5cc5d..1748a5952b 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -29,8 +29,8 @@ using namespace arm_compute; -GCSoftmaxLayer::GCSoftmaxLayer() - : _max_kernel(), _shift_exp_sum_kernel(), _norm_kernel(), _max(), _sum(), _tmp() +GCSoftmaxLayer::GCSoftmaxLayer(std::shared_ptr<IMemoryManager> memory_manager) + : _memory_group(std::move(memory_manager)), _max_kernel(), _shift_exp_sum_kernel(), _norm_kernel(), _max(), _sum(), _tmp() { } @@ -50,6 +50,11 @@ void GCSoftmaxLayer::configure(const IGCTensor *input, IGCTensor *output, float _max.allocator()->init(tensor_info_max_sum); _sum.allocator()->init(tensor_info_max_sum); + // Manage intermediate buffers + _memory_group.manage(&_tmp); + _memory_group.manage(&_max); + _memory_group.manage(&_sum); + // Configure Kernels _max_kernel.configure(input, &_max); _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum); @@ -63,9 +68,13 @@ void GCSoftmaxLayer::configure(const IGCTensor *input, IGCTensor *output, float void GCSoftmaxLayer::run() { + _memory_group.acquire(); + GCScheduler::get().dispatch(_max_kernel, false); GCScheduler::get().memory_barrier(); GCScheduler::get().dispatch(_shift_exp_sum_kernel, false); GCScheduler::get().memory_barrier(); GCScheduler::get().dispatch(_norm_kernel); + + _memory_group.release(); } |