From baf174e85ddb5399355281cd34d0f459d92124a7 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 8 Sep 2017 19:47:30 +0100 Subject: COMPMID-485: Memory Manager Change-Id: Ib421b7622838f050038cd81e7426bb1413a7d6e6 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/87376 Tested-by: Kaizen Reviewed-by: Anthony Barbier --- arm_compute/core/CL/OpenCL.h | 2 + arm_compute/core/Helpers.inl | 8 +- arm_compute/runtime/Allocator.h | 45 ++++++ arm_compute/runtime/BlobLifetimeManager.h | 87 +++++++++++ arm_compute/runtime/BlobMemoryPool.h | 75 ++++++++++ arm_compute/runtime/CL/CLBufferAllocator.h | 51 +++++++ arm_compute/runtime/CL/CLMemoryGroup.h | 45 ++++++ arm_compute/runtime/CL/CLTensorAllocator.h | 25 +++- .../runtime/CL/functions/CLConvolutionLayer.h | 10 +- .../runtime/CL/functions/CLFullyConnectedLayer.h | 4 +- arm_compute/runtime/CL/functions/CLSoftmaxLayer.h | 7 +- arm_compute/runtime/IAllocator.h | 49 +++++++ arm_compute/runtime/ILifetimeManager.h | 80 +++++++++++ arm_compute/runtime/IMemoryGroup.h | 45 ++++++ arm_compute/runtime/IMemoryManager.h | 56 ++++++++ arm_compute/runtime/IMemoryPool.h | 64 +++++++++ arm_compute/runtime/IPoolManager.h | 58 ++++++++ arm_compute/runtime/MemoryGroup.h | 44 ++++++ arm_compute/runtime/MemoryGroupBase.h | 159 +++++++++++++++++++++ arm_compute/runtime/MemoryManagerOnDemand.h | 84 +++++++++++ .../runtime/NEON/functions/NEConvolutionLayer.h | 7 +- .../runtime/NEON/functions/NEFullyConnectedLayer.h | 7 +- .../runtime/NEON/functions/NESoftmaxLayer.h | 4 +- arm_compute/runtime/PoolManager.h | 67 +++++++++ arm_compute/runtime/TensorAllocator.h | 25 +++- arm_compute/runtime/Types.h | 49 +++++++ scripts/clang_tidy_rules.py | 1 + src/core/CL/OpenCL.cpp | 15 ++ src/runtime/Allocator.cpp | 41 ++++++ src/runtime/BlobLifetimeManager.cpp | 149 +++++++++++++++++++ src/runtime/BlobMemoryPool.cpp | 99 +++++++++++++ src/runtime/CL/CLBufferAllocator.cpp | 49 +++++++ src/runtime/CL/CLTensor.cpp | 2 +- src/runtime/CL/CLTensorAllocator.cpp | 35 ++++- src/runtime/CL/functions/CLConvolutionLayer.cpp | 32 +++-- src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 11 +- src/runtime/CL/functions/CLSoftmaxLayer.cpp | 14 +- src/runtime/MemoryManagerOnDemand.cpp | 88 ++++++++++++ src/runtime/NEON/functions/NEConvolutionLayer.cpp | 31 ++-- .../NEON/functions/NEFullyConnectedLayer.cpp | 21 ++- src/runtime/NEON/functions/NESoftmaxLayer.cpp | 13 +- src/runtime/PoolManager.cpp | 74 ++++++++++ src/runtime/Tensor.cpp | 2 +- src/runtime/TensorAllocator.cpp | 74 ++++++++-- support/Semaphore.h | 92 ++++++++++++ 45 files changed, 1929 insertions(+), 71 deletions(-) create mode 100644 arm_compute/runtime/Allocator.h create mode 100644 arm_compute/runtime/BlobLifetimeManager.h create mode 100644 arm_compute/runtime/BlobMemoryPool.h create mode 100644 arm_compute/runtime/CL/CLBufferAllocator.h create mode 100644 arm_compute/runtime/CL/CLMemoryGroup.h create mode 100644 arm_compute/runtime/IAllocator.h create mode 100644 arm_compute/runtime/ILifetimeManager.h create mode 100644 arm_compute/runtime/IMemoryGroup.h create mode 100644 arm_compute/runtime/IMemoryManager.h create mode 100644 arm_compute/runtime/IMemoryPool.h create mode 100644 arm_compute/runtime/IPoolManager.h create mode 100644 arm_compute/runtime/MemoryGroup.h create mode 100644 arm_compute/runtime/MemoryGroupBase.h create mode 100644 arm_compute/runtime/MemoryManagerOnDemand.h create mode 100644 arm_compute/runtime/PoolManager.h create mode 100644 arm_compute/runtime/Types.h create mode 100644 src/runtime/Allocator.cpp create mode 100644 src/runtime/BlobLifetimeManager.cpp create mode 100644 src/runtime/BlobMemoryPool.cpp create mode 100644 src/runtime/CL/CLBufferAllocator.cpp create mode 100644 src/runtime/MemoryManagerOnDemand.cpp create mode 100644 src/runtime/PoolManager.cpp create mode 100644 support/Semaphore.h diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h index bea9743f48..562f30bf14 100644 --- a/arm_compute/core/CL/OpenCL.h +++ b/arm_compute/core/CL/OpenCL.h @@ -57,6 +57,7 @@ public: using clBuildProgram_func = cl_int (*)(cl_program, cl_uint, const cl_device_id *, const char *, void (*pfn_notify)(cl_program, void *), void *); using clEnqueueNDRangeKernel_func = cl_int (*)(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *); using clSetKernelArg_func = cl_int (*)(cl_kernel, cl_uint, size_t, const void *); + using clRetainMemObject_func = cl_int (*)(cl_mem); using clReleaseMemObject_func = cl_int (*)(cl_mem); using clEnqueueUnmapMemObject_func = cl_int (*)(cl_command_queue, cl_mem, void *, cl_uint, const cl_event *, cl_event *); using clRetainCommandQueue_func = cl_int (*)(cl_command_queue command_queue); @@ -106,6 +107,7 @@ public: clReleaseContext_func clReleaseContext = nullptr; clRetainCommandQueue_func clRetainCommandQueue = nullptr; clEnqueueUnmapMemObject_func clEnqueueUnmapMemObject = nullptr; + clRetainMemObject_func clRetainMemObject = nullptr; clReleaseMemObject_func clReleaseMemObject = nullptr; clGetDeviceInfo_func clGetDeviceInfo = nullptr; clGetDeviceIDs_func clGetDeviceIDs = nullptr; diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl index 90a4618fcc..e20bdb58a1 100644 --- a/arm_compute/core/Helpers.inl +++ b/arm_compute/core/Helpers.inl @@ -311,9 +311,13 @@ inline ValidRegion calculate_valid_region_scale(const ITensorInfo &src_info, con Coordinates anchor; anchor.set_num_dimensions(src_info.tensor_shape().num_dimensions()); TensorShape new_dst_shape(dst_shape); - anchor.set(0, (policy == InterpolationPolicy::BILINEAR && border_undefined) ? ((static_cast(src_info.valid_region().anchor[0]) + border_size.left + 0.5f) * wr - 0.5f) : + anchor.set(0, (policy == InterpolationPolicy::BILINEAR + && border_undefined) ? + ((static_cast(src_info.valid_region().anchor[0]) + border_size.left + 0.5f) * wr - 0.5f) : ((static_cast(src_info.valid_region().anchor[0]) + 0.5f) * wr - 0.5f)); - anchor.set(1, (policy == InterpolationPolicy::BILINEAR && border_undefined) ? ((static_cast(src_info.valid_region().anchor[1]) + border_size.top + 0.5f) * hr - 0.5f) : + anchor.set(1, (policy == InterpolationPolicy::BILINEAR + && border_undefined) ? + ((static_cast(src_info.valid_region().anchor[1]) + border_size.top + 0.5f) * hr - 0.5f) : ((static_cast(src_info.valid_region().anchor[1]) + 0.5f) * hr - 0.5f)); float shape_out_x = (policy == InterpolationPolicy::BILINEAR && border_undefined) ? diff --git a/arm_compute/runtime/Allocator.h b/arm_compute/runtime/Allocator.h new file mode 100644 index 0000000000..cf6f07b211 --- /dev/null +++ b/arm_compute/runtime/Allocator.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ALLOCATOR_H__ +#define __ARM_COMPUTE_ALLOCATOR_H__ + +#include "arm_compute/runtime/IAllocator.h" + +#include + +namespace arm_compute +{ +/** Default malloc allocator implementation */ +class Allocator : public IAllocator +{ +public: + /** Default constructor */ + Allocator() = default; + + // Inherited methods overridden: + void *allocate(size_t size, size_t alignment) override; + void free(void *ptr) override; +}; +} // arm_compute +#endif /*__ARM_COMPUTE_ALLOCATOR_H__ */ diff --git a/arm_compute/runtime/BlobLifetimeManager.h b/arm_compute/runtime/BlobLifetimeManager.h new file mode 100644 index 0000000000..ec43f47fe6 --- /dev/null +++ b/arm_compute/runtime/BlobLifetimeManager.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_BLOBLIFETIMEMANAGER_H__ +#define __ARM_COMPUTE_BLOBLIFETIMEMANAGER_H__ + +#include "arm_compute/runtime/ILifetimeManager.h" + +#include "arm_compute/runtime/IMemoryGroup.h" +#include "arm_compute/runtime/Types.h" + +#include +#include +#include + +namespace arm_compute +{ +class IMemoryGroup; + +/** Class that tracks the lifetime of registered tensors and calculates the systems memory requirements in terms of blobs */ +class BlobLifetimeManager : public ILifetimeManager +{ +public: + /** Constructor */ + BlobLifetimeManager(); + /** Prevent instances of this class to be copy constructed */ + BlobLifetimeManager(const BlobLifetimeManager &) = delete; + /** Prevent instances of this class to be copied */ + BlobLifetimeManager &operator=(const BlobLifetimeManager &) = delete; + /** Allow instances of this class to be move constructed */ + BlobLifetimeManager(BlobLifetimeManager &&) = default; + /** Allow instances of this class to be moved */ + BlobLifetimeManager &operator=(BlobLifetimeManager &&) = default; + + // Inherited methods overridden: + void register_group(IMemoryGroup *group) override; + void start_lifetime(void *obj) override; + void end_lifetime(void *obj, void **handle, size_t size) override; + std::unique_ptr create_pool(IAllocator *allocator) override; + bool are_all_finalized() const override; + MappingType mapping_type() const override; + +private: + /** Update blobs and mappings */ + void update_blobs_and_mappings(); + +private: + /** Element struct */ + struct Element + { + Element(void *id_ = nullptr, void **handle_ = nullptr, size_t size_ = 0, bool status_ = false) + : id(id_), handle(handle_), size(size_), status(status_) + { + } + void *id; /**< Element id */ + void **handle; /**< Element's memory handle */ + size_t size; /**< Element's size */ + bool status; /**< Lifetime status */ + }; + + IMemoryGroup *_active_group; /**< Active group */ + std::vector _active_elements; /**< A map that contains the active elements */ + std::map> _finalized_groups; /**< A map that contains the finalized groups */ + std::vector _blobs; +}; +} // arm_compute +#endif /* __ARM_COMPUTE_BLOBLIFETIMEMANAGER_H__ */ diff --git a/arm_compute/runtime/BlobMemoryPool.h b/arm_compute/runtime/BlobMemoryPool.h new file mode 100644 index 0000000000..f2be2dd8df --- /dev/null +++ b/arm_compute/runtime/BlobMemoryPool.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_BLOBMEMORYPOOL_H__ +#define __ARM_COMPUTE_BLOBMEMORYPOOL_H__ + +#include "arm_compute/runtime/IMemoryPool.h" + +#include "arm_compute/runtime/IAllocator.h" +#include "arm_compute/runtime/Types.h" + +#include +#include + +namespace arm_compute +{ +/** Blob memory pool */ +class BlobMemoryPool : public IMemoryPool +{ +public: + /** Default Constructor */ + BlobMemoryPool(IAllocator *allocator, std::vector blob_sizes); + /** Default Destructor */ + ~BlobMemoryPool(); + /** Prevent instances of this class to be copy constructed */ + BlobMemoryPool(const BlobMemoryPool &) = delete; + /** Prevent instances of this class to be copy assigned */ + BlobMemoryPool &operator=(const BlobMemoryPool &) = delete; + /** Allow instances of this class to be move constructed */ + BlobMemoryPool(BlobMemoryPool &&) = default; + /** Allow instances of this class to be move assigned */ + BlobMemoryPool &operator=(BlobMemoryPool &&) = default; + + // Inherited methods overridden: + void acquire(MemoryMappings &handles) override; + void release(MemoryMappings &handles) override; + MappingType mapping_type() const override; + std::unique_ptr duplicate() override; + +private: + /** Allocates internal blobs + * + * @param sizes Size of each blob + */ + void allocate_blobs(const std::vector &sizes); + /** Frees blobs **/ + void free_blobs(); + +private: + IAllocator *_allocator; /**< Allocator to use for internal allocation */ + std::vector _blobs; /**< Vector holding all the memory blobs */ + std::vector _blob_sizes; /**< Sizes of each blob */ +}; +} // arm_compute +#endif /* __ARM_COMPUTE_BLOBMEMORYPOOL_H__ */ diff --git a/arm_compute/runtime/CL/CLBufferAllocator.h b/arm_compute/runtime/CL/CLBufferAllocator.h new file mode 100644 index 0000000000..05b0363dc3 --- /dev/null +++ b/arm_compute/runtime/CL/CLBufferAllocator.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBUFFERALLOCATOR_H__ +#define __ARM_COMPUTE_CLBUFFERALLOCATOR_H__ + +#include "arm_compute/runtime/IAllocator.h" + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +#include + +namespace arm_compute +{ +/** Default OpenCL cl buffer allocator implementation */ +class CLBufferAllocator : public IAllocator +{ +public: + /** Default constructor */ + explicit CLBufferAllocator(cl::Context context = CLScheduler::get().context()); + + // Inherited methods overridden: + void *allocate(size_t size, size_t alignment) override; + void free(void *ptr) override; + +private: + cl::Context _context; +}; +} // arm_compute +#endif /*__ARM_COMPUTE_CLBUFFERALLOCATOR_H__ */ diff --git a/arm_compute/runtime/CL/CLMemoryGroup.h b/arm_compute/runtime/CL/CLMemoryGroup.h new file mode 100644 index 0000000000..a6f3eb1c3c --- /dev/null +++ b/arm_compute/runtime/CL/CLMemoryGroup.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMEMORYGROUP_H__ +#define __ARM_COMPUTE_CLMEMORYGROUP_H__ + +#include "arm_compute/runtime/MemoryGroupBase.h" + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +namespace arm_compute +{ +using CLMemoryGroup = MemoryGroupBase; + +template <> +inline void MemoryGroupBase::associate_memory_group(CLTensor *obj) +{ + ARM_COMPUTE_ERROR_ON(obj == nullptr); + auto allocator = dynamic_cast(obj->allocator()); + ARM_COMPUTE_ERROR_ON(allocator == nullptr); + allocator->set_associated_memory_group(this); +} +} // arm_compute +#endif /*__ARM_COMPUTE_CLMEMORYGROUP_H__ */ diff --git a/arm_compute/runtime/CL/CLTensorAllocator.h b/arm_compute/runtime/CL/CLTensorAllocator.h index ed371e0642..682de174a8 100644 --- a/arm_compute/runtime/CL/CLTensorAllocator.h +++ b/arm_compute/runtime/CL/CLTensorAllocator.h @@ -24,19 +24,27 @@ #ifndef __ARM_COMPUTE_CLTENSORALLOCATOR_H__ #define __ARM_COMPUTE_CLTENSORALLOCATOR_H__ -#include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/runtime/ITensorAllocator.h" +#include "arm_compute/core/CL/OpenCL.h" + #include namespace arm_compute { +class CLTensor; +template +class MemoryGroupBase; +using CLMemoryGroup = MemoryGroupBase; + /** Basic implementation of a CL memory tensor allocator. */ class CLTensorAllocator : public ITensorAllocator { public: /** Default constructor. */ - CLTensorAllocator(); + CLTensorAllocator(CLTensor *owner = nullptr); + /** Default destructor */ + ~CLTensorAllocator(); /** Prevent instances of this class from being copied (As this class contains pointers). */ CLTensorAllocator(const CLTensorAllocator &) = delete; /** Prevent instances of this class from being copy assigned (As this class contains pointers). */ @@ -45,8 +53,6 @@ public: CLTensorAllocator(CLTensorAllocator &&) = default; /** Allow instances of this class to be moved */ CLTensorAllocator &operator=(CLTensorAllocator &&) = default; - /** Default destructor */ - ~CLTensorAllocator() = default; /** Interface to be implemented by the child class to return the pointer to the mapped data. */ uint8_t *data(); @@ -85,6 +91,11 @@ public: * */ void free() override; + /** Associates the tensor with a memory group + * + * @param[in] associated_memory_group Memory group to associate the tensor with + */ + void set_associated_memory_group(CLMemoryGroup *associated_memory_group); protected: /** Call map() on the OpenCL buffer. @@ -96,8 +107,10 @@ protected: void unlock() override; private: - cl::Buffer _buffer; /**< OpenCL buffer containing the tensor data. */ - uint8_t *_mapping; /**< Pointer to the CPU mapping of the OpenCL buffer. */ + CLMemoryGroup *_associated_memory_group; /**< Registered memory manager */ + cl::Buffer _buffer; /**< OpenCL buffer containing the tensor data. */ + uint8_t *_mapping; /**< Pointer to the CPU mapping of the OpenCL buffer. */ + CLTensor *_owner; /**< Owner of the allocator */ }; } #endif /* __ARM_COMPUTE_CLTENSORALLOCATOR_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h index 2057b6ff8a..cd1ea70a23 100644 --- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h @@ -34,7 +34,11 @@ #include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" #include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IMemoryManager.h" + +#include namespace arm_compute { @@ -48,7 +52,7 @@ class CLConvolutionLayerReshapeWeights : public IFunction { public: /** Constructor */ - CLConvolutionLayerReshapeWeights(); + CLConvolutionLayerReshapeWeights(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QS8/QS16/F16/F32. @@ -62,6 +66,7 @@ public: void run() override; private: + CLMemoryGroup _memory_group; CLWeightsReshapeKernel _weights_reshape_kernel; CLGEMMTranspose1xWKernel _weights_transposed_kernel; CLTensor _weights_reshaped; @@ -81,7 +86,7 @@ class CLConvolutionLayer : public IFunction { public: /** Default constructor */ - CLConvolutionLayer(); + CLConvolutionLayer(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -101,6 +106,7 @@ public: void run() override; private: + CLMemoryGroup _memory_group; CLConvolutionLayerReshapeWeights _reshape_weights; CLIm2ColKernel _input_im2col_kernel; CLGEMMInterleave4x4Kernel _input_interleave_kernel; diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h index e076f51b26..f71e2a33f9 100644 --- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h +++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h @@ -30,6 +30,7 @@ #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" #include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" #include "arm_compute/core/CL/kernels/CLTransposeKernel.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" namespace arm_compute @@ -64,7 +65,7 @@ class CLFullyConnectedLayer : public IFunction { public: /** Constructor */ - CLFullyConnectedLayer(); + CLFullyConnectedLayer(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * * @param[in] input Source tensor. Data type supported: QS8/QS16/F16/F32. @@ -83,6 +84,7 @@ private: void configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output); void configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output); + CLMemoryGroup _memory_group; CLIm2ColKernel _im2col_kernel; CLFullyConnectedLayerReshapeWeights _reshape_weights_kernel; CLGEMMMatrixMultiplyKernel _mm_kernel; diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h index 18f7a02a3e..70a265c1ae 100644 --- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h +++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h @@ -25,8 +25,12 @@ #define __ARM_COMPUTE_CLSOFTMAXLAYER_H__ #include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" + +#include namespace arm_compute { @@ -46,7 +50,7 @@ class CLSoftmaxLayer : public IFunction { public: /** Constructor */ - CLSoftmaxLayer(); + CLSoftmaxLayer(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32 @@ -58,6 +62,7 @@ public: void run() override; private: + CLMemoryGroup _memory_group; CLLogits1DMaxKernel _max_kernel; CLLogits1DShiftExpSumKernel _shift_exp_sum_kernel; CLLogits1DNormKernel _norm_kernel; diff --git a/arm_compute/runtime/IAllocator.h b/arm_compute/runtime/IAllocator.h new file mode 100644 index 0000000000..3edb34a9ea --- /dev/null +++ b/arm_compute/runtime/IAllocator.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IALLOCATOR_H__ +#define __ARM_COMPUTE_IALLOCATOR_H__ + +#include + +namespace arm_compute +{ +/** Allocator interface */ +class IAllocator +{ +public: + /** Default virtual destructor. */ + virtual ~IAllocator() = default; + /** Interface to be implemented by the child class to allocate bytes + * + * @param[in] size Size to allocate + * @param[in] alignment Alignment that the returned pointer should comply with + * + * @return A pointer to the allocated memory + */ + virtual void *allocate(size_t size, size_t alignment) = 0; + /** Interface to be implemented by the child class to free the allocated tensor */ + virtual void free(void *ptr) = 0; +}; +} // arm_compute +#endif /*__ARM_COMPUTE_IALLOCATOR_H__ */ diff --git a/arm_compute/runtime/ILifetimeManager.h b/arm_compute/runtime/ILifetimeManager.h new file mode 100644 index 0000000000..4f9af6f535 --- /dev/null +++ b/arm_compute/runtime/ILifetimeManager.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ILIFETIMEMANAGER_H__ +#define __ARM_COMPUTE_ILIFETIMEMANAGER_H__ + +#include "arm_compute/runtime/IMemoryPool.h" +#include "arm_compute/runtime/Types.h" + +#include +#include + +namespace arm_compute +{ +class IMemoryGroup; +class IAllocator; + +/** Interface for managing the lifetime of objects */ +class ILifetimeManager +{ +public: + /** Virtual Destructor */ + virtual ~ILifetimeManager() = default; + /** Registers a group to the lifetime manager and assigns a group id + * + * @return The group id of the group + */ + virtual void register_group(IMemoryGroup *group) = 0; + /** Registers and starts lifetime of an object + * + * @param[in] obj Object to register + */ + virtual void start_lifetime(void *obj) = 0; + /** Ends lifetime of an object + * + * @param[in] obj Object + * @param[in] handle Memory handle of the object + * @param[in] size Size of the given object at given time + */ + virtual void end_lifetime(void *obj, void **handle, size_t size) = 0; + /** Creates a memory pool depending on the memory requirements + * + * @param allocator Allocator to use + * + * @return A memory pool + */ + virtual std::unique_ptr create_pool(IAllocator *allocator) = 0; + /** Checks if the lifetime of the registered object is complete + * + * @return True if all object lifetimes are finalized else false. + */ + virtual bool are_all_finalized() const = 0; + /** Returns the type of mappings that the lifetime manager returns + * + * @return Mapping type of the lifetime manager + */ + virtual MappingType mapping_type() const = 0; +}; +} // arm_compute +#endif /* __ARM_COMPUTE_ILIFETIMEMANAGER_H__ */ diff --git a/arm_compute/runtime/IMemoryGroup.h b/arm_compute/runtime/IMemoryGroup.h new file mode 100644 index 0000000000..be03ea4a01 --- /dev/null +++ b/arm_compute/runtime/IMemoryGroup.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IMEMORYGROUP_H__ +#define __ARM_COMPUTE_IMEMORYGROUP_H__ + +#include "arm_compute/runtime/Types.h" + +namespace arm_compute +{ +/** Memory group interface */ +class IMemoryGroup +{ +public: + /** Default virtual destructor */ + virtual ~IMemoryGroup() = default; + /** Acquires backing memory for the whole group */ + virtual void acquire() = 0; + /** Releases backing memory of the whole group */ + virtual void release() = 0; + /** Gets the memory mapping of the group */ + virtual MemoryMappings &mappings() = 0; +}; +} // arm_compute +#endif /*__ARM_COMPUTE_IMEMORYGROUP_H__ */ diff --git a/arm_compute/runtime/IMemoryManager.h b/arm_compute/runtime/IMemoryManager.h new file mode 100644 index 0000000000..00aa566a50 --- /dev/null +++ b/arm_compute/runtime/IMemoryManager.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IMEMORYMANAGER_H__ +#define __ARM_COMPUTE_IMEMORYMANAGER_H__ + +#include "arm_compute/runtime/ILifetimeManager.h" +#include "arm_compute/runtime/IPoolManager.h" + +#include + +namespace arm_compute +{ +class IMemoryGroup; + +/** Memory manager interface to handle allocations of backing memory */ +class IMemoryManager +{ +public: + /** Default virtual destructor */ + virtual ~IMemoryManager() = default; + /** Returns the lifetime manager used by the memory manager + * + * @return The lifetime manager + */ + virtual ILifetimeManager *lifetime_manager() = 0; + /** Returns the pool manager used by the memory manager + * + * @return The pool manager + */ + virtual IPoolManager *pool_manager() = 0; + /** Finalize memory manager */ + virtual void finalize() = 0; +}; +} // arm_compute +#endif /*__ARM_COMPUTE_IMEMORYMANAGER_H__ */ diff --git a/arm_compute/runtime/IMemoryPool.h b/arm_compute/runtime/IMemoryPool.h new file mode 100644 index 0000000000..aee6ad274b --- /dev/null +++ b/arm_compute/runtime/IMemoryPool.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IMEMORYPOOL_H__ +#define __ARM_COMPUTE_IMEMORYPOOL_H__ + +#include "arm_compute/runtime/Types.h" + +#include +#include +#include +#include + +namespace arm_compute +{ +/** Memory Pool Inteface */ +class IMemoryPool +{ +public: + /** Default Virtual Destructor */ + virtual ~IMemoryPool() = default; + /** Sets occupant to the memory pool + * + * @param[in] handles A vector of pairs (handle, index) + */ + virtual void acquire(MemoryMappings &handles) = 0; + /** Releases a memory block + * + * @param[in] handles A vector containing a pair of handles and indices + */ + virtual void release(MemoryMappings &handles) = 0; + /** Returns the mapping types that this pool accepts + * + * @return the mapping type of the memory + */ + virtual MappingType mapping_type() const = 0; + /** Duplicates the existing memory pool + * + * @return A duplicate of the existing pool + */ + virtual std::unique_ptr duplicate() = 0; +}; +} // arm_compute +#endif /* __ARM_COMPUTE_IMEMORYPOOL_H__ */ diff --git a/arm_compute/runtime/IPoolManager.h b/arm_compute/runtime/IPoolManager.h new file mode 100644 index 0000000000..03cccdd605 --- /dev/null +++ b/arm_compute/runtime/IPoolManager.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IPOOLMANAGER_H__ +#define __ARM_COMPUTE_IPOOLMANAGER_H__ + +#include + +namespace arm_compute +{ +class IMemoryPool; + +/** Memory pool manager interface */ +class IPoolManager +{ +public: + /** Default virtual destructor */ + virtual ~IPoolManager() = default; + /** Locks a pool for execution + * + * @return Locked pool that workload will be mapped on + */ + virtual IMemoryPool *lock_pool() = 0; + /** Releases memory pool + * + * @param[in] pool Memory pool to release + */ + virtual void unlock_pool(IMemoryPool *pool) = 0; + /** Register pool to be managed by the pool + * + * @note Ownership of the pools is being transferred to the pool manager + * + * @param[in] pool Pool to be managed + */ + virtual void register_pool(std::unique_ptr pool) = 0; +}; +} // arm_compute +#endif /*__ARM_COMPUTE_IPOOLMANAGER_H__ */ diff --git a/arm_compute/runtime/MemoryGroup.h b/arm_compute/runtime/MemoryGroup.h new file mode 100644 index 0000000000..d3f647e7d4 --- /dev/null +++ b/arm_compute/runtime/MemoryGroup.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_MEMORYGROUP_H__ +#define __ARM_COMPUTE_MEMORYGROUP_H__ + +#include "arm_compute/runtime/MemoryGroupBase.h" + +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +using MemoryGroup = MemoryGroupBase; + +template <> +inline void MemoryGroupBase::associate_memory_group(Tensor *obj) +{ + ARM_COMPUTE_ERROR_ON(obj == nullptr); + auto allocator = dynamic_cast(obj->allocator()); + ARM_COMPUTE_ERROR_ON(allocator == nullptr); + allocator->set_associated_memory_group(this); +} +} // arm_compute +#endif /*__ARM_COMPUTE_MEMORYGROUP_H__ */ diff --git a/arm_compute/runtime/MemoryGroupBase.h b/arm_compute/runtime/MemoryGroupBase.h new file mode 100644 index 0000000000..ab8acb3494 --- /dev/null +++ b/arm_compute/runtime/MemoryGroupBase.h @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_MEMORYGROUPBASE_H__ +#define __ARM_COMPUTE_MEMORYGROUPBASE_H__ + +#include "arm_compute/runtime/IMemoryGroup.h" + +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/IMemoryPool.h" + +#include +#include + +namespace arm_compute +{ +/** Memory group */ +template +class MemoryGroupBase : public IMemoryGroup +{ +public: + /** Default Constructor */ + MemoryGroupBase(std::shared_ptr memory_manager = nullptr); + /** Default destructor */ + ~MemoryGroupBase() = default; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + MemoryGroupBase(const MemoryGroupBase &) = delete; + /** Prevent instances of this class from being copy assigned (As this class contains pointers). */ + MemoryGroupBase &operator=(const MemoryGroupBase &) = delete; + /** Allow instances of this class to be moved */ + MemoryGroupBase(MemoryGroupBase &&) = default; + /** Allow instances of this class to be moved */ + MemoryGroupBase &operator=(MemoryGroupBase &&) = default; + /** Sets a object to be managed by the given memory group + * + * @note Manager must not be finalized + * + * @param[in] obj Object to be managed + */ + void manage(TensorType *obj); + /** Finalizes memory for a given object + * + * @note Manager must not be finalized + * + * @param[in] obj Object to request memory for + * @param[in] handle Handle to store the memory + * @param[in] size Size of memory to allocate + */ + void finalize_memory(TensorType *obj, void **handle, size_t size); + + // Inherited methods overridden: + void acquire() override; + void release() override; + MemoryMappings &mappings() override; + +private: + void associate_memory_group(TensorType *obj); + +private: + std::shared_ptr _memory_manager; /**< Memory manager to be used by the group */ + IMemoryPool *_pool; /**< Memory pool that the group is scheduled with */ + MemoryMappings _mappings; /**< Memory mappings of the group */ +}; + +template +inline MemoryGroupBase::MemoryGroupBase(std::shared_ptr memory_manager) + : _memory_manager(std::move(memory_manager)), _pool(nullptr), _mappings() +{ + if(_memory_manager) + { + ARM_COMPUTE_ERROR_ON(!_memory_manager->lifetime_manager()); + } +} + +template +inline void MemoryGroupBase::manage(TensorType *obj) +{ + if(_memory_manager) + { + ARM_COMPUTE_ERROR_ON(!_memory_manager->lifetime_manager()); + + // Defer registration to the first managed object + _memory_manager->lifetime_manager()->register_group(this); + + // Associate this memory group with the tensor + associate_memory_group(obj); + + // Start object lifetime + _memory_manager->lifetime_manager()->start_lifetime(obj); + } +} + +template +inline void MemoryGroupBase::finalize_memory(TensorType *obj, void **handle, size_t size) +{ + if(_memory_manager) + { + ARM_COMPUTE_ERROR_ON(!_memory_manager->lifetime_manager()); + _memory_manager->lifetime_manager()->end_lifetime(obj, handle, size); + } +} + +template +inline void MemoryGroupBase::acquire() +{ + if(!_mappings.empty()) + { + ARM_COMPUTE_ERROR_ON(!_memory_manager->pool_manager()); + _pool = _memory_manager->pool_manager()->lock_pool(); + _pool->acquire(_mappings); + } +} + +template +inline void MemoryGroupBase::release() +{ + if(_pool != nullptr) + { + ARM_COMPUTE_ERROR_ON(!_memory_manager->pool_manager()); + ARM_COMPUTE_ERROR_ON(_mappings.empty()); + _pool->release(_mappings); + _memory_manager->pool_manager()->unlock_pool(_pool); + _pool = nullptr; + } +} + +template +inline MemoryMappings &MemoryGroupBase::mappings() +{ + return _mappings; +} + +template +inline void MemoryGroupBase::associate_memory_group(TensorType *) +{ + ARM_COMPUTE_ERROR("Must be implemented by child class"); +} +} // arm_compute +#endif /*__ARM_COMPUTE_MEMORYGROUPBASE_H__ */ diff --git a/arm_compute/runtime/MemoryManagerOnDemand.h b/arm_compute/runtime/MemoryManagerOnDemand.h new file mode 100644 index 0000000000..ad4b831e1f --- /dev/null +++ b/arm_compute/runtime/MemoryManagerOnDemand.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_MEMORYMANAGERONDEMAND_H__ +#define __ARM_COMPUTE_MEMORYMANAGERONDEMAND_H__ + +#include "arm_compute/runtime/IMemoryManager.h" + +#include "IAllocator.h" +#include "arm_compute/runtime/ILifetimeManager.h" +#include "arm_compute/runtime/IMemoryGroup.h" +#include "arm_compute/runtime/IPoolManager.h" + +#include +#include + +namespace arm_compute +{ +class IAllocator; + +/** On-demand memory manager */ +class MemoryManagerOnDemand : public IMemoryManager +{ +public: + /** Default Constructor */ + MemoryManagerOnDemand(std::shared_ptr lifetime_manager, std::shared_ptr pool_manager); + /** Prevent instances of this class to be copy constructed */ + MemoryManagerOnDemand(const MemoryManagerOnDemand &) = delete; + /** Prevent instances of this class to be copied */ + MemoryManagerOnDemand &operator=(const MemoryManagerOnDemand &) = delete; + /** Allow instances of this class to be move constructed */ + MemoryManagerOnDemand(MemoryManagerOnDemand &&) = default; + /** Allow instances of this class to be moved */ + MemoryManagerOnDemand &operator=(MemoryManagerOnDemand &&) = default; + /** Sets the number of pools to create + * + * @param[in] num_pools Number of pools + */ + void set_num_pools(unsigned int num_pools); + /** Sets the allocator to be used for configuring the pools + * + * @param[in] allocator Allocator to use + */ + void set_allocator(IAllocator *allocator); + /** Checks if the memory manager has been finalized + * + * @return True if the memory manager has been finalized else false + */ + bool is_finalized() const; + + // Inherited methods overridden: + ILifetimeManager *lifetime_manager() override; + IPoolManager *pool_manager() override; + void finalize() override; + +private: + std::shared_ptr _lifetime_mgr; /**< Lifetime manager */ + std::shared_ptr _pool_mgr; /**< Memory pool manager */ + IAllocator *_allocator; /**< Allocator used for backend allocations */ + bool _is_finalized; /**< Flag that notes if the memory manager has been finalized */ + unsigned int _num_pools; /**< Number of pools to create */ +}; +} // arm_compute +#endif /*__ARM_COMPUTE_MEMORYMANAGERONDEMAND_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h index 1bd7e6a95f..8e040b3055 100644 --- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h @@ -34,6 +34,7 @@ #include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" #include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/Tensor.h" namespace arm_compute @@ -48,7 +49,7 @@ class NEConvolutionLayerReshapeWeights : public IFunction { public: /** Constructor */ - NEConvolutionLayerReshapeWeights(); + NEConvolutionLayerReshapeWeights(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QS8/QS16/F32. @@ -62,6 +63,7 @@ public: void run() override; private: + MemoryGroup _memory_group; NEWeightsReshapeKernel _weights_reshape_kernel; NEGEMMTranspose1xWKernel _weights_transposed_kernel; Tensor _weights_reshaped; @@ -79,7 +81,7 @@ class NEConvolutionLayer : public IFunction { public: /** Constructor */ - NEConvolutionLayer(); + NEConvolutionLayer(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -98,6 +100,7 @@ public: void run() override; private: + MemoryGroup _memory_group; NEIm2ColKernel _input_im2col_kernel; NEGEMMInterleave4x4Kernel _input_interleave_kernel; NEConvolutionLayerReshapeWeights _reshape_weights; diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h index 08099b8539..463a7d53e3 100644 --- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -32,6 +32,7 @@ #include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" #include "arm_compute/core/NEON/kernels/NETransposeKernel.h" +#include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/Tensor.h" namespace arm_compute @@ -47,7 +48,7 @@ class NEFullyConnectedLayerReshapeWeights : public IFunction { public: /** Constructor */ - NEFullyConnectedLayerReshapeWeights(); + NEFullyConnectedLayerReshapeWeights(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QS8/QS16/F32. @@ -61,6 +62,7 @@ public: void run() override; private: + MemoryGroup _memory_group; NETransposeKernel _transpose_kernel; NEGEMMTranspose1xWKernel _transpose1xW_kernel; Tensor _transpose_output; @@ -81,7 +83,7 @@ class NEFullyConnectedLayer : public IFunction { public: /** Constructor */ - NEFullyConnectedLayer(); + NEFullyConnectedLayer(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * * @param[in] input Source tensor. Data type supported: QS8/QS16/F32. @@ -97,6 +99,7 @@ public: void run() override; private: + MemoryGroup _memory_group; NEIm2ColKernel _im2col_kernel; NEFullyConnectedLayerReshapeWeights _reshape_weights_kernel; NEGEMMInterleave4x4Kernel _interleave4x4_kernel; diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h index 01402aee63..a265f70043 100644 --- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h @@ -27,6 +27,7 @@ #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/Tensor.h" namespace arm_compute @@ -47,7 +48,7 @@ class NESoftmaxLayer : public IFunction { public: /** Constructor */ - NESoftmaxLayer(); + NESoftmaxLayer(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32. @@ -59,6 +60,7 @@ public: void run() override; private: + MemoryGroup _memory_group; NELogits1DMaxKernel _max_kernel; NELogits1DShiftExpSumKernel _shift_exp_sum_kernel; NELogits1DNormKernel _norm_kernel; diff --git a/arm_compute/runtime/PoolManager.h b/arm_compute/runtime/PoolManager.h new file mode 100644 index 0000000000..6549350e8f --- /dev/null +++ b/arm_compute/runtime/PoolManager.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_POOLMANAGER_H__ +#define __ARM_COMPUTE_POOLMANAGER_H__ + +#include "arm_compute/runtime/IPoolManager.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/runtime/IMemoryPool.h" +#include "support/Mutex.h" +#include "support/Semaphore.h" + +#include +#include +#include + +namespace arm_compute +{ +/** Memory pool manager */ +class PoolManager : public IPoolManager +{ +public: + /** Default Constructor */ + PoolManager(); + /** Prevent instances of this class to be copy constructed */ + PoolManager(const PoolManager &) = delete; + /** Prevent instances of this class to be copied */ + PoolManager &operator=(const PoolManager &) = delete; + /** Allow instances of this class to be move constructed */ + PoolManager(PoolManager &&) = default; + /** Allow instances of this class to be moved */ + PoolManager &operator=(PoolManager &&) = default; + + // Inherited methods overridden: + IMemoryPool *lock_pool() override; + void unlock_pool(IMemoryPool *pool) override; + void register_pool(std::unique_ptr pool) override; + +private: + std::list> _free_pools; /**< List of free pools */ + std::list> _occupied_pools; /**< List of occupied pools */ + std::unique_ptr _sem; /**< Semaphore to control the queues */ + arm_compute::Mutex _mtx; /**< Mutex to control access to the queues */ +}; +} // arm_compute +#endif /*__ARM_COMPUTE_POOLMANAGER_H__ */ diff --git a/arm_compute/runtime/TensorAllocator.h b/arm_compute/runtime/TensorAllocator.h index 450323b3ab..40704c0a17 100644 --- a/arm_compute/runtime/TensorAllocator.h +++ b/arm_compute/runtime/TensorAllocator.h @@ -34,13 +34,27 @@ namespace arm_compute { class Coordinates; class TensorInfo; +class Tensor; +template +class MemoryGroupBase; +using MemoryGroup = MemoryGroupBase; /** Basic implementation of a CPU memory tensor allocator. */ class TensorAllocator : public ITensorAllocator { public: /** Default constructor. */ - TensorAllocator(); + TensorAllocator(Tensor *owner = nullptr); + /** Default destructor */ + ~TensorAllocator(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + TensorAllocator(const TensorAllocator &) = delete; + /** Prevent instances of this class from being copy assigned (As this class contains pointers). */ + TensorAllocator &operator=(const TensorAllocator &) = delete; + /** Allow instances of this class to be moved */ + TensorAllocator(TensorAllocator &&) noexcept; + /** Allow instances of this class to be moved */ + TensorAllocator &operator=(TensorAllocator &&) noexcept; /** Make ITensorAllocator's init methods available */ using ITensorAllocator::init; @@ -72,6 +86,11 @@ public: * */ void free() override; + /** Associates the tensor with a memory group + * + * @param[in] associated_memory_group Memory group to associate the tensor with + */ + void set_associated_memory_group(MemoryGroup *associated_memory_group); protected: /** No-op for CPU memory @@ -84,7 +103,9 @@ protected: void unlock() override; private: - std::shared_ptr> _buffer; /**< CPU memory allocation. */ + MemoryGroup *_associated_memory_group; /**< Registered memory manager */ + uint8_t *_buffer; /**< CPU memory allocation. */ + Tensor *_owner; /**< Owner of the allocator */ }; } #endif /* __ARM_COMPUTE_TENSORALLOCATOR_H__ */ diff --git a/arm_compute/runtime/Types.h b/arm_compute/runtime/Types.h new file mode 100644 index 0000000000..9916e6d47b --- /dev/null +++ b/arm_compute/runtime/Types.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_RUNTIME_TYPES_H__ +#define __ARM_COMPUTE_RUNTIME_TYPES_H__ + +#include + +namespace arm_compute +{ +/** Mapping type */ +enum class MappingType +{ + BLOBS, /**< Mappings are in blob granularity */ + OFFSETS /**< Mappings are in offset granularity in the same blob */ +}; + +/** A map of (handle, index/offset), where handle is the memory handle of the object + * to provide the memory for and index/offset is the buffer/offset from the pool that should be used + * + * @note All objects are pre-pinned to specific buffers to avoid any relevant overheads + */ +using MemoryMappings = std::map; + +/** A map of the groups and memory mappings */ +using GroupMappings = std::map; + +} // arm_compute +#endif /* __ARM_COMPUTE_RUNTIME_TYPES_H__ */ diff --git a/scripts/clang_tidy_rules.py b/scripts/clang_tidy_rules.py index d4419df38f..d763a07079 100755 --- a/scripts/clang_tidy_rules.py +++ b/scripts/clang_tidy_rules.py @@ -74,6 +74,7 @@ def filter_clang_tidy_lines( lines ): ("NEGEMMMatrixMultiplyKernel.cpp" in line and "do not use C-style cast to convert between unrelated types" in line) or ("NEPoolingLayerKernel.cpp" in line and "do not use C-style cast to convert between unrelated types" in line) or ("NESoftmaxLayerKernel.cpp" in line and "do not use C-style cast to convert between unrelated types" in line) or + ("parameter 'memory_manager' is unused" in line) or "3rdparty" in line): print_context=False continue diff --git a/src/core/CL/OpenCL.cpp b/src/core/CL/OpenCL.cpp index 085e186543..0f44ad999f 100644 --- a/src/core/CL/OpenCL.cpp +++ b/src/core/CL/OpenCL.cpp @@ -95,6 +95,7 @@ bool CLSymbols::load(const std::string &library) clReleaseContext = reinterpret_cast(dlsym(handle, "clReleaseContext")); clRetainCommandQueue = reinterpret_cast(dlsym(handle, "clRetainCommandQueue")); clEnqueueUnmapMemObject = reinterpret_cast(dlsym(handle, "clEnqueueUnmapMemObject")); + clRetainMemObject = reinterpret_cast(dlsym(handle, "clRetainMemObject")); clReleaseMemObject = reinterpret_cast(dlsym(handle, "clReleaseMemObject")); clGetDeviceInfo = reinterpret_cast(dlsym(handle, "clGetDeviceInfo")); clGetDeviceIDs = reinterpret_cast(dlsym(handle, "clGetDeviceIDs")); @@ -175,6 +176,20 @@ cl_int clSetKernelArg( } } +cl_int clRetainMemObject(cl_mem memobj) +{ + arm_compute::CLSymbols::get().load_default(); + auto func = arm_compute::CLSymbols::get().clRetainMemObject; + if(func != nullptr) + { + return func(memobj); + } + else + { + return CL_OUT_OF_RESOURCES; + } +} + cl_int clReleaseMemObject(cl_mem memobj) { arm_compute::CLSymbols::get().load_default(); diff --git a/src/runtime/Allocator.cpp b/src/runtime/Allocator.cpp new file mode 100644 index 0000000000..50b0f0e6bb --- /dev/null +++ b/src/runtime/Allocator.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/Allocator.h" + +#include "arm_compute/core/Error.h" + +#include + +using namespace arm_compute; + +void *Allocator::allocate(size_t size, size_t alignment) +{ + ARM_COMPUTE_UNUSED(alignment); + return ::operator new(size); +} + +void Allocator::free(void *ptr) +{ + ::operator delete(ptr); +} diff --git a/src/runtime/BlobLifetimeManager.cpp b/src/runtime/BlobLifetimeManager.cpp new file mode 100644 index 0000000000..c60d8c14ef --- /dev/null +++ b/src/runtime/BlobLifetimeManager.cpp @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/BlobLifetimeManager.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/runtime/BlobMemoryPool.h" +#include "arm_compute/runtime/IAllocator.h" +#include "arm_compute/runtime/IMemoryGroup.h" +#include "support/ToolchainSupport.h" + +#include +#include +#include +#include + +using namespace arm_compute; + +BlobLifetimeManager::BlobLifetimeManager() + : _active_group(nullptr), _active_elements(), _finalized_groups(), _blobs() +{ +} + +void BlobLifetimeManager::register_group(IMemoryGroup *group) +{ + if(_active_group == nullptr) + { + ARM_COMPUTE_ERROR_ON(group == nullptr); + _active_group = group; + } +} + +void BlobLifetimeManager::start_lifetime(void *obj) +{ + ARM_COMPUTE_ERROR_ON(obj == nullptr); + ARM_COMPUTE_ERROR_ON_MSG(std::find_if(std::begin(_active_elements), std::end(_active_elements), [&obj](const Element & e) + { + return obj == e.id; + }) != std::end(_active_elements), + "Memory object is already registered!"); + + // Insert object in groups and mark its finalized state to false + _active_elements.emplace_back(obj); +} + +void BlobLifetimeManager::end_lifetime(void *obj, void **handle, size_t size) +{ + ARM_COMPUTE_ERROR_ON(obj == nullptr); + + // Find object + auto it = std::find_if(std::begin(_active_elements), std::end(_active_elements), [&obj](const Element & e) + { + return obj == e.id; + }); + ARM_COMPUTE_ERROR_ON(it == std::end(_active_elements)); + + // Update object fields and mark object as complete + it->handle = handle; + it->size = size; + it->status = true; + + // Check if all object are finalized and reset active group + if(are_all_finalized()) + { + // Update finalized groups + _finalized_groups[_active_group].insert(std::end(_finalized_groups[_active_group]), std::begin(_active_elements), std::end(_active_elements)); + + // Update blobs and group mappings + update_blobs_and_mappings(); + + // Reset state + _active_elements.clear(); + _active_group = nullptr; + } +} + +std::unique_ptr BlobLifetimeManager::create_pool(IAllocator *allocator) +{ + ARM_COMPUTE_ERROR_ON(allocator == nullptr); + return support::cpp14::make_unique(allocator, _blobs); +} + +bool BlobLifetimeManager::are_all_finalized() const +{ + return !std::any_of(std::begin(_active_elements), std::end(_active_elements), [](const Element e) + { + return !e.status; + }); +} + +MappingType BlobLifetimeManager::mapping_type() const +{ + return MappingType::BLOBS; +} + +void BlobLifetimeManager::update_blobs_and_mappings() +{ + ARM_COMPUTE_ERROR_ON(!are_all_finalized()); + ARM_COMPUTE_ERROR_ON(_active_group == nullptr); + + // Sort finalized group requirements in descending order + auto group = _finalized_groups[_active_group]; + std::sort(std::begin(group), std::end(group), [](const Element & a, const Element & b) + { + return a.size > b.size; + }); + std::vector group_sizes; + std::transform(std::begin(group), std::end(group), std::back_inserter(group_sizes), [](const Element & e) + { + return e.size; + }); + + // Update blob sizes + size_t max_size = std::max(_blobs.size(), group_sizes.size()); + _blobs.resize(max_size, 0); + group_sizes.resize(max_size, 0); + std::transform(std::begin(_blobs), std::end(_blobs), std::begin(group_sizes), std::begin(_blobs), [](size_t lhs, size_t rhs) + { + return std::max(lhs, rhs); + }); + + // Calculate group mappings + auto &group_mappings = _active_group->mappings(); + int blob_idx = 0; + for(auto &e : group) + { + group_mappings[e.handle] = blob_idx++; + } +} diff --git a/src/runtime/BlobMemoryPool.cpp b/src/runtime/BlobMemoryPool.cpp new file mode 100644 index 0000000000..6571c75fe7 --- /dev/null +++ b/src/runtime/BlobMemoryPool.cpp @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/BlobMemoryPool.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/runtime/IMemoryPool.h" +#include "arm_compute/runtime/Types.h" +#include "support/ToolchainSupport.h" + +#include + +using namespace arm_compute; + +BlobMemoryPool::BlobMemoryPool(IAllocator *allocator, std::vector blob_sizes) + : _allocator(allocator), _blobs(), _blob_sizes(std::move(blob_sizes)) +{ + ARM_COMPUTE_ERROR_ON(!allocator); + allocate_blobs(_blob_sizes); +} + +BlobMemoryPool::~BlobMemoryPool() +{ + ARM_COMPUTE_ERROR_ON(!_allocator); + free_blobs(); +} + +void BlobMemoryPool::acquire(MemoryMappings &handles) +{ + ARM_COMPUTE_ERROR_ON(handles.size() > _blobs.size()); + + // Set memory to handlers + for(auto &handle : handles) + { + ARM_COMPUTE_ERROR_ON(handle.first == nullptr); + *handle.first = _blobs[handle.second]; + } +} + +void BlobMemoryPool::release(MemoryMappings &handles) +{ + for(auto &handle : handles) + { + ARM_COMPUTE_ERROR_ON(handle.first == nullptr); + *handle.first = nullptr; + } +} + +MappingType BlobMemoryPool::mapping_type() const +{ + return MappingType::BLOBS; +} + +std::unique_ptr BlobMemoryPool::duplicate() +{ + ARM_COMPUTE_ERROR_ON(!_allocator); + return support::cpp14::make_unique(_allocator, _blob_sizes); +} + +void BlobMemoryPool::allocate_blobs(const std::vector &sizes) +{ + ARM_COMPUTE_ERROR_ON(!_allocator); + + for(const auto &size : sizes) + { + _blobs.push_back(_allocator->allocate(size, 0)); + } +} + +void BlobMemoryPool::free_blobs() +{ + ARM_COMPUTE_ERROR_ON(!_allocator); + + for(auto &blob : _blobs) + { + _allocator->free(blob); + } + _blobs.clear(); +} \ No newline at end of file diff --git a/src/runtime/CL/CLBufferAllocator.cpp b/src/runtime/CL/CLBufferAllocator.cpp new file mode 100644 index 0000000000..9a5c13ac5a --- /dev/null +++ b/src/runtime/CL/CLBufferAllocator.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/CLBufferAllocator.h" + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Error.h" + +#include + +using namespace arm_compute; + +CLBufferAllocator::CLBufferAllocator(cl::Context context) + : _context(std::move(context)) +{ +} + +void *CLBufferAllocator::allocate(size_t size, size_t alignment) +{ + ARM_COMPUTE_UNUSED(alignment); + cl_mem buf = clCreateBuffer(_context.get(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size, nullptr, nullptr); + return static_cast(buf); +} + +void CLBufferAllocator::free(void *ptr) +{ + ARM_COMPUTE_ERROR_ON(ptr == nullptr); + clReleaseMemObject(static_cast(ptr)); +} diff --git a/src/runtime/CL/CLTensor.cpp b/src/runtime/CL/CLTensor.cpp index eefa0331d5..bc513d139b 100644 --- a/src/runtime/CL/CLTensor.cpp +++ b/src/runtime/CL/CLTensor.cpp @@ -28,7 +28,7 @@ using namespace arm_compute; CLTensor::CLTensor() - : _allocator() + : _allocator(this) { } diff --git a/src/runtime/CL/CLTensorAllocator.cpp b/src/runtime/CL/CLTensorAllocator.cpp index 8112a7148f..ad165fad7d 100644 --- a/src/runtime/CL/CLTensorAllocator.cpp +++ b/src/runtime/CL/CLTensorAllocator.cpp @@ -25,15 +25,21 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/TensorInfo.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLScheduler.h" using namespace arm_compute; -CLTensorAllocator::CLTensorAllocator() - : _buffer(), _mapping(nullptr) +CLTensorAllocator::CLTensorAllocator(CLTensor *owner) + : _associated_memory_group(nullptr), _buffer(), _mapping(nullptr), _owner(owner) { } +CLTensorAllocator::~CLTensorAllocator() +{ + _buffer = cl::Buffer(); +} + uint8_t *CLTensorAllocator::data() { return _mapping; @@ -47,17 +53,32 @@ const cl::Buffer &CLTensorAllocator::cl_data() const void CLTensorAllocator::allocate() { ARM_COMPUTE_ERROR_ON(_buffer.get() != nullptr); - - _buffer = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, info().total_size()); + if(_associated_memory_group == nullptr) + { + _buffer = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, info().total_size()); + } + else + { + _associated_memory_group->finalize_memory(_owner, reinterpret_cast(&_buffer()), info().total_size()); + } info().set_is_resizable(false); } void CLTensorAllocator::free() { - ARM_COMPUTE_ERROR_ON(_buffer.get() == nullptr); + if(_associated_memory_group == nullptr) + { + _buffer = cl::Buffer(); + info().set_is_resizable(true); + } +} - _buffer = cl::Buffer(); - info().set_is_resizable(true); +void CLTensorAllocator::set_associated_memory_group(CLMemoryGroup *associated_memory_group) +{ + ARM_COMPUTE_ERROR_ON(associated_memory_group == nullptr); + ARM_COMPUTE_ERROR_ON(_associated_memory_group != nullptr); + ARM_COMPUTE_ERROR_ON(_buffer.get() != nullptr); + _associated_memory_group = associated_memory_group; } uint8_t *CLTensorAllocator::lock() diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index 0bbec94e78..4b1bfd8b8f 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -30,12 +30,13 @@ #include "arm_compute/runtime/CL/CLScheduler.h" #include +#include #include using namespace arm_compute; -CLConvolutionLayerReshapeWeights::CLConvolutionLayerReshapeWeights() - : _weights_reshape_kernel(), _weights_transposed_kernel(), _weights_reshaped(), _transpose1xW(false) +CLConvolutionLayerReshapeWeights::CLConvolutionLayerReshapeWeights(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _weights_reshape_kernel(), _weights_transposed_kernel(), _weights_reshaped(), _transpose1xW(false) { } @@ -68,6 +69,7 @@ void CLConvolutionLayerReshapeWeights::configure(const ICLTensor *weights, const TensorInfo info_wr(shape_wr, 1, dt, fixed_point_position); _weights_reshaped.allocator()->init(info_wr); + _memory_group.manage(&_weights_reshaped); _weights_reshape_kernel.configure(weights, biases, &_weights_reshaped); _weights_transposed_kernel.configure(&_weights_reshaped, output); _weights_reshaped.allocator()->allocate(); @@ -80,17 +82,21 @@ void CLConvolutionLayerReshapeWeights::configure(const ICLTensor *weights, const void CLConvolutionLayerReshapeWeights::run() { + _memory_group.acquire(); + cl::CommandQueue q = CLScheduler::get().queue(); CLScheduler::get().enqueue(_weights_reshape_kernel); if(_transpose1xW) { CLScheduler::get().enqueue(_weights_transposed_kernel); } + + _memory_group.release(); } -CLConvolutionLayer::CLConvolutionLayer() - : _reshape_weights(), _input_im2col_kernel(), _input_interleave_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(), - _weights_transposed(), _gemm_output(), _has_bias(false), _is_fully_connected_convolution(false), _are_weights_reshaped(false) +CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _reshape_weights(), _input_im2col_kernel(), _input_interleave_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), + _input_interleaved_reshaped(), _weights_reshaped(), _weights_transposed(), _gemm_output(), _has_bias(false), _is_fully_connected_convolution(false), _are_weights_reshaped(false) { } @@ -179,6 +185,7 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig shape_im2col.set(1, mat_input_rows); shape_im2col.set(2, 1); _input_im2col_reshaped.allocator()->init(TensorInfo(shape_im2col, 1, dt, fixed_point_position)); + _memory_group.manage(&_input_im2col_reshaped); // Create tensor (interleave) to prepare input tensor for GEMM if(!_is_fully_connected_convolution) @@ -187,6 +194,7 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig shape_interleaved.set(0, shape_interleaved.x() * 4); shape_interleaved.set(1, std::ceil(shape_interleaved.y() / 4.f)); _input_interleaved_reshaped.allocator()->init(TensorInfo(shape_interleaved, 1, dt, fixed_point_position)); + _memory_group.manage(&_input_interleaved_reshaped); } // Create GEMM output tensor @@ -194,6 +202,7 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig shape_gemm.set(0, mat_weights_cols); shape_gemm.set(1, mat_input_rows); _gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, dt, fixed_point_position)); + _memory_group.manage(&_gemm_output); // Configure kernels _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias); @@ -208,8 +217,11 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig { _input_interleave_kernel.configure(&_input_im2col_reshaped, &_input_interleaved_reshaped); _mm_kernel.configure(&_input_interleaved_reshaped, weights, &_gemm_output, 1.0f); + _input_interleaved_reshaped.allocator()->allocate(); } + _input_im2col_reshaped.allocator()->allocate(); _output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h)); + _gemm_output.allocator()->allocate(); ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(0) != conv_w) || (output->info()->dimension(1) != conv_h), "Output shape does not match the expected one"); @@ -218,12 +230,6 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig { _weights_reshaped.allocator()->allocate(); } - _input_im2col_reshaped.allocator()->allocate(); - if(!_is_fully_connected_convolution) - { - _input_interleaved_reshaped.allocator()->allocate(); - } - _gemm_output.allocator()->allocate(); } void CLConvolutionLayer::run() @@ -235,6 +241,8 @@ void CLConvolutionLayer::run() _reshape_weights.run(); } + _memory_group.acquire(); + // Run input reshaping CLScheduler::get().enqueue(_input_im2col_kernel); if(!_is_fully_connected_convolution) @@ -247,4 +255,6 @@ void CLConvolutionLayer::run() // Reshape output matrix CLScheduler::get().enqueue(_output_col2im_kernel, false); + + _memory_group.release(); } diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index f7cea551f6..ee1558fe71 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -39,9 +39,9 @@ void CLFullyConnectedLayerReshapeWeights::configure(const ICLTensor *input, ICLT _kernel = std::move(k); } -CLFullyConnectedLayer::CLFullyConnectedLayer() - : _im2col_kernel(), _reshape_weights_kernel(), _mm_kernel(), _accumulate_biases_kernel(), _im2col_output(), _reshape_weights_output(), _are_weights_reshaped(true), _is_fc_after_conv(true), - _accumulate_biases(false) +CLFullyConnectedLayer::CLFullyConnectedLayer(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _im2col_kernel(), _reshape_weights_kernel(), _mm_kernel(), _accumulate_biases_kernel(), _im2col_output(), _reshape_weights_output(), + _are_weights_reshaped(true), _is_fc_after_conv(true), _accumulate_biases(false) { } @@ -63,6 +63,7 @@ void CLFullyConnectedLayer::configure_conv_fc(const ICLTensor *input, const ICLT _im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, dt, fixed_point_position)); // Configure im2col kernel + _memory_group.manage(&_im2col_output); _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false); // Configure matrix multiply kernel @@ -158,6 +159,8 @@ void CLFullyConnectedLayer::run() _reshape_weights_kernel.run(); } + _memory_group.acquire(); + // Linearize input if it comes from a convolutional layer if(_is_fc_after_conv) { @@ -172,4 +175,6 @@ void CLFullyConnectedLayer::run() { CLScheduler::get().enqueue(_accumulate_biases_kernel); } + + _memory_group.release(); } diff --git a/src/runtime/CL/functions/CLSoftmaxLayer.cpp b/src/runtime/CL/functions/CLSoftmaxLayer.cpp index 850eb2c6f8..7505a2c974 100644 --- a/src/runtime/CL/functions/CLSoftmaxLayer.cpp +++ b/src/runtime/CL/functions/CLSoftmaxLayer.cpp @@ -25,12 +25,13 @@ #include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h" #include "arm_compute/core/Helpers.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLScheduler.h" using namespace arm_compute; -CLSoftmaxLayer::CLSoftmaxLayer() - : _max_kernel(), _shift_exp_sum_kernel(), _norm_kernel(), _max(), _sum(), _tmp() +CLSoftmaxLayer::CLSoftmaxLayer(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _max_kernel(), _shift_exp_sum_kernel(), _norm_kernel(), _max(), _sum(), _tmp() { } @@ -47,6 +48,11 @@ void CLSoftmaxLayer::configure(const ICLTensor *input, ICLTensor *output) _max.allocator()->init(tensor_info_max_sum); _sum.allocator()->init(tensor_info_max_sum); + // Manage intermediate buffers + _memory_group.manage(&_tmp); + _memory_group.manage(&_max); + _memory_group.manage(&_sum); + // Configure Kernels _max_kernel.configure(input, &_max); _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum); @@ -60,7 +66,11 @@ void CLSoftmaxLayer::configure(const ICLTensor *input, ICLTensor *output) void CLSoftmaxLayer::run() { + _memory_group.acquire(); + CLScheduler::get().enqueue(_max_kernel, false); CLScheduler::get().enqueue(_shift_exp_sum_kernel, false); CLScheduler::get().enqueue(_norm_kernel); + + _memory_group.release(); } diff --git a/src/runtime/MemoryManagerOnDemand.cpp b/src/runtime/MemoryManagerOnDemand.cpp new file mode 100644 index 0000000000..4dfa28bddb --- /dev/null +++ b/src/runtime/MemoryManagerOnDemand.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/runtime/ILifetimeManager.h" +#include "arm_compute/runtime/IPoolManager.h" + +#include + +using namespace arm_compute; + +MemoryManagerOnDemand::MemoryManagerOnDemand(std::shared_ptr lifetime_manager, std::shared_ptr pool_manager) + : _lifetime_mgr(std::move(lifetime_manager)), _pool_mgr(std::move(pool_manager)), _allocator(nullptr), _is_finalized(false), _num_pools(1) +{ + ARM_COMPUTE_ERROR_ON_MSG(!_lifetime_mgr, "Lifetime manager not specified correctly!"); + ARM_COMPUTE_ERROR_ON_MSG(!_pool_mgr, "Pool manager not specified correctly!"); +} + +bool MemoryManagerOnDemand::is_finalized() const +{ + return _is_finalized; +} + +void MemoryManagerOnDemand::set_num_pools(unsigned int num_pools) +{ + ARM_COMPUTE_ERROR_ON(num_pools == 0); + _num_pools = num_pools; +} + +void MemoryManagerOnDemand::set_allocator(IAllocator *allocator) +{ + ARM_COMPUTE_ERROR_ON_MSG(is_finalized(), "Memory manager is already finalized!"); + ARM_COMPUTE_ERROR_ON(allocator == nullptr); + _allocator = allocator; +} + +ILifetimeManager *MemoryManagerOnDemand::lifetime_manager() +{ + return _lifetime_mgr.get(); +} + +IPoolManager *MemoryManagerOnDemand::pool_manager() +{ + return _pool_mgr.get(); +} + +void MemoryManagerOnDemand::finalize() +{ + ARM_COMPUTE_ERROR_ON_MSG(is_finalized(), "Memory manager is already finalized!"); + ARM_COMPUTE_ERROR_ON(!_lifetime_mgr); + ARM_COMPUTE_ERROR_ON(!_pool_mgr); + ARM_COMPUTE_ERROR_ON_MSG(!_lifetime_mgr->are_all_finalized(), "All the objects have not been finalized! "); + ARM_COMPUTE_ERROR_ON(_allocator == nullptr); + + // Create pools + auto pool_template = _lifetime_mgr->create_pool(_allocator); + for(int i = _num_pools; i > 1; --i) + { + auto pool = pool_template->duplicate(); + _pool_mgr->register_pool(std::move(pool)); + } + _pool_mgr->register_pool(std::move(pool_template)); + + // Set finalized to true + _is_finalized = true; +} diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp index 1c87f60a29..0466a4a501 100644 --- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp @@ -34,8 +34,8 @@ using namespace arm_compute; -NEConvolutionLayerReshapeWeights::NEConvolutionLayerReshapeWeights() - : _weights_reshape_kernel(), _weights_transposed_kernel(), _weights_reshaped(), _transpose1xW(false) +NEConvolutionLayerReshapeWeights::NEConvolutionLayerReshapeWeights(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _weights_reshape_kernel(), _weights_transposed_kernel(), _weights_reshaped(), _transpose1xW(false) { } @@ -68,6 +68,7 @@ void NEConvolutionLayerReshapeWeights::configure(const ITensor *weights, const I TensorInfo info_wr(shape_wr, 1, weights->info()->data_type(), weights->info()->fixed_point_position()); _weights_reshaped.allocator()->init(info_wr); + _memory_group.manage(&_weights_reshaped); _weights_reshape_kernel.configure(weights, biases, &_weights_reshaped); _weights_transposed_kernel.configure(&_weights_reshaped, output); _weights_reshaped.allocator()->allocate(); @@ -80,16 +81,20 @@ void NEConvolutionLayerReshapeWeights::configure(const ITensor *weights, const I void NEConvolutionLayerReshapeWeights::run() { + _memory_group.acquire(); + NEScheduler::get().schedule(&_weights_reshape_kernel, 3); if(_transpose1xW) { NEScheduler::get().schedule(&_weights_transposed_kernel, Window::DimY); } + + _memory_group.release(); } -NEConvolutionLayer::NEConvolutionLayer() - : _input_im2col_kernel(), _input_interleave_kernel(), _reshape_weights(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(), - _gemm_output(), _has_bias(false), _is_fully_connected_convolution(false), _are_weights_reshaped(false) +NEConvolutionLayer::NEConvolutionLayer(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _input_im2col_kernel(), _input_interleave_kernel(), _reshape_weights(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), + _input_interleaved_reshaped(), _weights_reshaped(), _gemm_output(), _has_bias(false), _is_fully_connected_convolution(false), _are_weights_reshaped(false) { } @@ -175,6 +180,7 @@ void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights, shape_im2col.set(1, mat_input_rows); shape_im2col.set(2, 1); _input_im2col_reshaped.allocator()->init(TensorInfo(shape_im2col, 1, dt, fixed_point_position)); + _memory_group.manage(&_input_im2col_reshaped); // Create tensor (interleave) to prepare input tensor for GEMM if(!_is_fully_connected_convolution) @@ -183,6 +189,7 @@ void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights, shape_interleaved.set(0, shape_interleaved.x() * 4); shape_interleaved.set(1, std::ceil(shape_interleaved.y() / 4.f)); _input_interleaved_reshaped.allocator()->init(TensorInfo(shape_interleaved, 1, dt, fixed_point_position)); + _memory_group.manage(&_input_interleaved_reshaped); } // Create GEMM output tensor @@ -190,6 +197,7 @@ void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights, shape_gemm.set(0, mat_weights_cols); shape_gemm.set(1, mat_input_rows); _gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, dt, fixed_point_position)); + _memory_group.manage(&_gemm_output); // Configure kernels _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias); @@ -201,8 +209,11 @@ void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights, { _input_interleave_kernel.configure(&_input_im2col_reshaped, &_input_interleaved_reshaped); _mm_kernel.configure(&_input_interleaved_reshaped, weights, &_gemm_output, 1.0f); + _input_interleaved_reshaped.allocator()->allocate(); } + _input_im2col_reshaped.allocator()->allocate(); _output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h)); + _gemm_output.allocator()->allocate(); ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(0) != conv_w) || (output->info()->dimension(1) != conv_h), "Output shape does not match the expected one"); @@ -211,12 +222,6 @@ void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights, { _weights_reshaped.allocator()->allocate(); } - _input_im2col_reshaped.allocator()->allocate(); - if(!_is_fully_connected_convolution) - { - _input_interleaved_reshaped.allocator()->allocate(); - } - _gemm_output.allocator()->allocate(); } void NEConvolutionLayer::run() @@ -228,6 +233,8 @@ void NEConvolutionLayer::run() _reshape_weights.run(); } + _memory_group.acquire(); + // Run input reshaping NEScheduler::get().schedule(&_input_im2col_kernel, Window::DimY); if(!_is_fully_connected_convolution) @@ -241,4 +248,6 @@ void NEConvolutionLayer::run() // Reshape output matrix NEScheduler::get().schedule(&_output_col2im_kernel, Window::DimY); + + _memory_group.release(); } diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index 39983bf643..2e8d10598d 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -32,8 +32,8 @@ namespace arm_compute { -NEFullyConnectedLayerReshapeWeights::NEFullyConnectedLayerReshapeWeights() - : _transpose_kernel(), _transpose1xW_kernel(), _transpose_output(), _transpose_weights(false), _is_batched_fc_layer(false) +NEFullyConnectedLayerReshapeWeights::NEFullyConnectedLayerReshapeWeights(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _transpose_kernel(), _transpose1xW_kernel(), _transpose_output(), _transpose_weights(false), _is_batched_fc_layer(false) { } @@ -58,6 +58,7 @@ void NEFullyConnectedLayerReshapeWeights::configure(const ITensor *input, ITenso // Initialize the output tensor for transpose TensorShape shape_transposed(input->info()->dimension(1), input->info()->dimension(0)); _transpose_output.allocator()->init(TensorInfo(shape_transposed, 1, data_type, fixed_point_position)); + _memory_group.manage(&_transpose_output); _transpose_kernel.configure(input, &_transpose_output); // Configure transpose 1xW kernel @@ -87,6 +88,8 @@ void NEFullyConnectedLayerReshapeWeights::configure(const ITensor *input, ITenso void NEFullyConnectedLayerReshapeWeights::run() { + _memory_group.acquire(); + if(_transpose_weights) { NEScheduler::get().schedule(&_transpose_kernel, Window::DimY); @@ -96,11 +99,13 @@ void NEFullyConnectedLayerReshapeWeights::run() { NEScheduler::get().schedule(&_transpose1xW_kernel, Window::DimY); } + + _memory_group.release(); } -NEFullyConnectedLayer::NEFullyConnectedLayer() - : _im2col_kernel(), _reshape_weights_kernel(), _interleave4x4_kernel(), _mm_kernel(), _accumulate_biases_kernel(), _im2col_output(), _interleave4x4_output(), _reshape_weights_output(), - _are_weights_reshaped(false), _is_batched_fc_layer(false), _linearize_input(false), _accumulate_biases(false) +NEFullyConnectedLayer::NEFullyConnectedLayer(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _im2col_kernel(), _reshape_weights_kernel(), _interleave4x4_kernel(), _mm_kernel(), _accumulate_biases_kernel(), _im2col_output(), _interleave4x4_output(), + _reshape_weights_output(), _are_weights_reshaped(false), _is_batched_fc_layer(false), _linearize_input(false), _accumulate_biases(false) { } @@ -191,6 +196,7 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh _im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, data_type, fixed_point_position)); // Configure im2col kernel + _memory_group.manage(&_im2col_output); _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false); multiply_input = &_im2col_output; @@ -204,6 +210,7 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh _interleave4x4_output.allocator()->init(TensorInfo(shape_interleaved, 1, data_type, fixed_point_position)); // Configure interleave4x4 kernel + _memory_group.manage(&_interleave4x4_output); _interleave4x4_kernel.configure(multiply_input, &_interleave4x4_output); multiply_input = &_interleave4x4_output; @@ -248,6 +255,8 @@ void NEFullyConnectedLayer::run() _reshape_weights_kernel.run(); } + _memory_group.acquire(); + // Linearize input if it comes from a convolutional layer if(_linearize_input) { @@ -268,5 +277,7 @@ void NEFullyConnectedLayer::run() { NEScheduler::get().schedule(&_accumulate_biases_kernel, Window::DimY); } + + _memory_group.release(); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NESoftmaxLayer.cpp b/src/runtime/NEON/functions/NESoftmaxLayer.cpp index 13dfa4a51e..cc5d4e91c3 100644 --- a/src/runtime/NEON/functions/NESoftmaxLayer.cpp +++ b/src/runtime/NEON/functions/NESoftmaxLayer.cpp @@ -31,8 +31,8 @@ using namespace arm_compute; -NESoftmaxLayer::NESoftmaxLayer() - : _max_kernel(), _shift_exp_sum_kernel(), _norm_kernel(), _fill_border_kernel(), _max(), _sum(), _tmp() +NESoftmaxLayer::NESoftmaxLayer(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _max_kernel(), _shift_exp_sum_kernel(), _norm_kernel(), _fill_border_kernel(), _max(), _sum(), _tmp() { } @@ -50,6 +50,11 @@ void NESoftmaxLayer::configure(ITensor *input, ITensor *output) _max.allocator()->init(tensor_info_max_sum); _sum.allocator()->init(tensor_info_max_sum); + // Manage intermediate buffers + _memory_group.manage(&_tmp); + _memory_group.manage(&_max); + _memory_group.manage(&_sum); + // Configure Kernels _max_kernel.configure(input, &_max); _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum); @@ -64,8 +69,12 @@ void NESoftmaxLayer::configure(ITensor *input, ITensor *output) void NESoftmaxLayer::run() { + _memory_group.acquire(); + NEScheduler::get().schedule(&_fill_border_kernel, Window::DimY); NEScheduler::get().schedule(&_max_kernel, Window::DimY); NEScheduler::get().schedule(&_shift_exp_sum_kernel, Window::DimY); NEScheduler::get().schedule(&_norm_kernel, Window::DimY); + + _memory_group.release(); } diff --git a/src/runtime/PoolManager.cpp b/src/runtime/PoolManager.cpp new file mode 100644 index 0000000000..42cc943e56 --- /dev/null +++ b/src/runtime/PoolManager.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/PoolManager.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/runtime/IMemoryPool.h" +#include "support/ToolchainSupport.h" + +#include + +using namespace arm_compute; + +PoolManager::PoolManager() + : _free_pools(), _occupied_pools(), _sem(), _mtx() +{ +} + +IMemoryPool *PoolManager::lock_pool() +{ + ARM_COMPUTE_ERROR_ON_MSG(_free_pools.empty() && _occupied_pools.empty(), "Haven't setup any pools!"); + + _sem->wait(); + std::lock_guard lock(_mtx); + ARM_COMPUTE_ERROR_ON_MSG(_free_pools.empty(), "Empty pool must exist as semaphore has been signalled"); + _occupied_pools.splice(std::begin(_occupied_pools), _free_pools, std::begin(_free_pools)); + return _occupied_pools.front().get(); +} + +void PoolManager::unlock_pool(IMemoryPool *pool) +{ + ARM_COMPUTE_ERROR_ON_MSG(_free_pools.empty() && _occupied_pools.empty(), "Haven't setup any pools!"); + + std::lock_guard lock(_mtx); + auto it = std::find_if(std::begin(_occupied_pools), std::end(_occupied_pools), [pool](const std::unique_ptr &pool_it) + { + return pool_it.get() == pool; + }); + ARM_COMPUTE_ERROR_ON_MSG(it == std::end(_occupied_pools), "Pool to be unlocked couldn't be found!"); + _free_pools.splice(std::begin(_free_pools), _occupied_pools, it); + _sem->signal(); +} + +void PoolManager::register_pool(std::unique_ptr pool) +{ + std::lock_guard lock(_mtx); + ARM_COMPUTE_ERROR_ON_MSG(!_occupied_pools.empty(), "All pools should be free in order to register a new one!"); + + // Set pool + _free_pools.push_front(std::move(pool)); + + // Update semaphore + _sem = arm_compute::support::cpp14::make_unique(_free_pools.size()); +} diff --git a/src/runtime/Tensor.cpp b/src/runtime/Tensor.cpp index 435068c61d..a76c37e3d0 100644 --- a/src/runtime/Tensor.cpp +++ b/src/runtime/Tensor.cpp @@ -26,7 +26,7 @@ using namespace arm_compute; Tensor::Tensor() - : _allocator() + : _allocator(this) { } diff --git a/src/runtime/TensorAllocator.cpp b/src/runtime/TensorAllocator.cpp index 5c719c761a..272b9f5695 100644 --- a/src/runtime/TensorAllocator.cpp +++ b/src/runtime/TensorAllocator.cpp @@ -26,6 +26,7 @@ #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/TensorInfo.h" +#include "arm_compute/runtime/MemoryGroup.h" #include @@ -63,11 +64,50 @@ bool validate_subtensor_shape(const TensorInfo &parent_info, const TensorInfo &c } } // namespace -TensorAllocator::TensorAllocator() - : _buffer(nullptr) +TensorAllocator::TensorAllocator(Tensor *owner) + : _associated_memory_group(nullptr), _buffer(nullptr), _owner(owner) { } +TensorAllocator::~TensorAllocator() +{ + if((_associated_memory_group == nullptr) && (_buffer != nullptr)) + { + delete[] _buffer; + _buffer = nullptr; + info().set_is_resizable(true); + } +} + +TensorAllocator::TensorAllocator(TensorAllocator &&o) noexcept + : ITensorAllocator(std::move(o)), + _associated_memory_group(o._associated_memory_group), + _buffer(o._buffer), + _owner(o._owner) +{ + o._associated_memory_group = nullptr; + o._buffer = nullptr; + o._owner = nullptr; +} + +TensorAllocator &TensorAllocator::operator=(TensorAllocator &&o) noexcept +{ + if(&o != this) + { + _associated_memory_group = o._associated_memory_group; + o._associated_memory_group = nullptr; + + _buffer = o._buffer; + o._buffer = nullptr; + + _owner = o._owner; + o._owner = nullptr; + + ITensorAllocator::operator=(std::move(o)); + } + return *this; +} + void TensorAllocator::init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo sub_info) { // Get parent info @@ -90,28 +130,44 @@ void TensorAllocator::init(const TensorAllocator &allocator, const Coordinates & uint8_t *TensorAllocator::data() const { - return (_buffer != nullptr) ? _buffer.get()->data() : nullptr; + return _buffer; } void TensorAllocator::allocate() { ARM_COMPUTE_ERROR_ON(_buffer != nullptr); - - _buffer = std::make_shared>(info().total_size()); + if(_associated_memory_group == nullptr) + { + _buffer = new uint8_t[info().total_size()](); + } + else + { + _associated_memory_group->finalize_memory(_owner, reinterpret_cast(&_buffer), info().total_size()); + } info().set_is_resizable(false); } void TensorAllocator::free() { - ARM_COMPUTE_ERROR_ON(_buffer == nullptr); + if((_associated_memory_group == nullptr) && (_buffer != nullptr)) + { + delete[] _buffer; + _buffer = nullptr; + info().set_is_resizable(true); + } +} - _buffer.reset(); - info().set_is_resizable(true); +void TensorAllocator::set_associated_memory_group(MemoryGroup *associated_memory_group) +{ + ARM_COMPUTE_ERROR_ON(associated_memory_group == nullptr); + ARM_COMPUTE_ERROR_ON(_associated_memory_group != nullptr); + ARM_COMPUTE_ERROR_ON(_buffer != nullptr); + _associated_memory_group = associated_memory_group; } uint8_t *TensorAllocator::lock() { - return (_buffer != nullptr) ? _buffer.get()->data() : nullptr; + return _buffer; } void TensorAllocator::unlock() diff --git a/support/Semaphore.h b/support/Semaphore.h new file mode 100644 index 0000000000..6cdf196dee --- /dev/null +++ b/support/Semaphore.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_UTILS_SEMAMPHORE_H__ +#define __ARM_COMPUTE_UTILS_SEMAMPHORE_H__ + +#include "Mutex.h" +#include "support/Mutex.h" +#include + +namespace arm_compute +{ +#ifndef NO_MULTI_THREADING +/** Semamphore class */ +class Semaphore +{ +public: + /** Default Constuctor + * + * @param[in] value Semaphore initial value + */ + Semaphore(int value = 0) + : _value(value), _m(), _cv() + { + } + /** Signals a semaphore */ + inline void signal() + { + std::unique_lock lock(_m); + ++_value; + _cv.notify_one(); + } + /** Waits on a semaphore */ + inline void wait() + { + std::unique_lock lock(_m); + _cv.wait(lock, [this]() + { + return _value > 0; + }); + --_value; + } + +private: + int _value; + arm_compute::Mutex _m; + std::condition_variable _cv; +}; +#else /* NO_MULTI_THREADING */ +/** Empty semamphore class */ +class Semaphore +{ +public: + Semaphore(int value = 0) + : _value(value) + { + } + /** Signals a semaphore */ + inline void signal() + { + } + /** Waits on a semaphore */ + inline void wait() + { + } + +private: + int _value; +}; +#endif /* NO_MULTI_THREADING */ +} // arm_compute +#endif /* __ARM_COMPUTE_UTILS_SEMAMPHORE_H__ */ -- cgit v1.2.1