From 0c19cbd5800e830fa67cdd3b725efe796b211899 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Tue, 11 May 2021 17:41:32 +0100 Subject: Move memory management out of CpuPooling Change-Id: Idae4fc687942f61a1f63f23c9e5538df28888d93 Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5632 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins --- src/runtime/NEON/functions/NEPoolingLayer.cpp | 19 ++++- src/runtime/cpu/operators/CpuPooling.cpp | 70 +++++++++++----- src/runtime/cpu/operators/CpuPooling.h | 23 +++-- .../cpu/operators/CpuPoolingAssemblyDispatch.cpp | 98 ---------------------- .../cpu/operators/CpuPoolingAssemblyDispatch.h | 89 -------------------- 5 files changed, 73 insertions(+), 226 deletions(-) delete mode 100644 src/runtime/cpu/operators/CpuPoolingAssemblyDispatch.cpp delete mode 100644 src/runtime/cpu/operators/CpuPoolingAssemblyDispatch.h (limited to 'src/runtime') diff --git a/src/runtime/NEON/functions/NEPoolingLayer.cpp b/src/runtime/NEON/functions/NEPoolingLayer.cpp index dd7a3a337e..1570cdeedc 100644 --- a/src/runtime/NEON/functions/NEPoolingLayer.cpp +++ b/src/runtime/NEON/functions/NEPoolingLayer.cpp @@ -23,7 +23,9 @@ */ #include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h" +#include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/Tensor.h" #include "src/runtime/cpu/operators/CpuPooling.h" namespace arm_compute @@ -33,16 +35,15 @@ struct NEPoolingLayer::Impl ITensor *src{ nullptr }; ITensor *dst{ nullptr }; ITensor *indices{ nullptr }; - std::shared_ptr memory_manager{ nullptr }; + Tensor workspace{ nullptr }; std::unique_ptr op{ nullptr }; }; NEPoolingLayer::~NEPoolingLayer() = default; NEPoolingLayer::NEPoolingLayer(std::shared_ptr memory_manager) - : _impl(std::make_unique()) + : _memory_group(memory_manager), _impl(std::make_unique()) { - _impl->memory_manager = std::move(memory_manager); } void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info, ITensor *indices) @@ -50,8 +51,17 @@ void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLay _impl->src = input; _impl->dst = output; _impl->indices = indices; - _impl->op = std::make_unique(_impl->memory_manager); + _impl->op = std::make_unique(); _impl->op->configure(input->info(), output->info(), pool_info, (indices) ? indices->info() : nullptr); + + // Allocate workspace based on kernel's memory requirements + const experimental::MemoryRequirements mem_req = _impl->op->workspace(); + if(!mem_req.empty()) + { + _impl->workspace.allocator()->init(TensorInfo(TensorShape{ (mem_req[0].size + mem_req[0].alignment) }, 1, DataType::S8), mem_req[0].alignment); + _memory_group.manage(&_impl->workspace); + _impl->workspace.allocator()->allocate(); + } } Status NEPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) @@ -65,6 +75,7 @@ void NEPoolingLayer::run() pack.add_tensor(TensorType::ACL_SRC, _impl->src); pack.add_tensor(TensorType::ACL_DST_0, _impl->dst); pack.add_tensor(TensorType::ACL_DST_1, _impl->indices); + pack.add_tensor(TensorType::ACL_INT_0, &_impl->workspace); _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/cpu/operators/CpuPooling.cpp b/src/runtime/cpu/operators/CpuPooling.cpp index 0b9b38d079..3a6ac24a74 100644 --- a/src/runtime/cpu/operators/CpuPooling.cpp +++ b/src/runtime/cpu/operators/CpuPooling.cpp @@ -27,41 +27,59 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/cpu/kernels/CpuPoolingAssemblyWrapperKernel.h" #include "src/core/cpu/kernels/CpuPoolingKernel.h" namespace arm_compute { namespace cpu { -CpuPooling::CpuPooling(std::shared_ptr memory_manager) - : _memory_manager(std::move(memory_manager)), _pooling_layer_kernel(), _border_handler(), _asm_glue(), _is_global_pooling_layer(false), _data_layout(DataLayout::NCHW) +CpuPooling::CpuPooling() + : _pooling_layer_kernel(), + _border_handler(), + _asm_glue(), + _is_global_pooling_layer(false), + _data_layout(DataLayout::NCHW), + _mem_req() { } CpuPooling::~CpuPooling() = default; -void CpuPooling::configure(ITensorInfo *input, ITensorInfo *output, const PoolingLayerInfo &pool_info, ITensorInfo *indices) +void CpuPooling::configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices) { // Check if we can run assembly kernels. Currently, indices are not supported by those kernels - const bool run_optimised = bool(CpuPoolingAssemblyDispatch::validate(input, output, pool_info)) && (indices == nullptr); + const bool run_optimised = bool(kernels::CpuPoolingAssemblyWrapperKernel::validate(src, dst, pool_info)) && (indices == nullptr); + + // Get data layout + _data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? src->data_layout() : pool_info.data_layout; + + // Check if we have Global Pooling Layer + const unsigned int idx_width = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH); + const unsigned int idx_height = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); + _is_global_pooling_layer = (src->dimension(idx_width) == pool_info.pool_size.width) && (src->dimension(idx_height) == pool_info.pool_size.height); if(run_optimised) { - _asm_glue = std::make_unique(_memory_manager); - _asm_glue->configure(input, output, pool_info); - ARM_COMPUTE_ERROR_ON(!_asm_glue->is_configured()); + const CPUInfo &ci = NEScheduler::get().cpu_info(); + const unsigned int num_threads = NEScheduler::get().num_threads(); + + auto pooling_wrapper = std::make_unique(); + ARM_COMPUTE_ERROR_ON(pooling_wrapper == nullptr); + pooling_wrapper->configure(src, dst, pool_info, ci); + + // Get kernel's memory requirements + constexpr size_t alignment = 4096; + const size_t workspace_size = pooling_wrapper->get_working_size(num_threads); + _mem_req.push_back({ TensorType::ACL_INT_0, workspace_size, alignment }); + + _asm_glue = std::move(pooling_wrapper); } else { - // Check if we have Global Pooling Layer - _is_global_pooling_layer = (input->dimension(0) == pool_info.pool_size.width) && (input->dimension(1) == pool_info.pool_size.height); - - // Get data layout - _data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? input->data_layout() : pool_info.data_layout; - // Configure pooling kernel auto k = std::make_unique(); - k->configure(input, output, pool_info, indices); + k->configure(src, dst, pool_info, indices); _pooling_layer_kernel = std::move(k); switch(_data_layout) @@ -71,12 +89,12 @@ void CpuPooling::configure(ITensorInfo *input, ITensorInfo *output, const Poolin // Configure border depending on operation required (quantize border in case of asymmetric data_type) BorderMode border_mode = (!indices && pool_info.pool_type == PoolingType::MAX) ? BorderMode::REPLICATE : BorderMode::CONSTANT; PixelValue zero_value((indices) ? std::numeric_limits::min() : 0.f); - if(is_data_type_quantized_asymmetric(input->data_type()) && !pool_info.exclude_padding) + if(is_data_type_quantized_asymmetric(src->data_type()) && !pool_info.exclude_padding) { - zero_value = PixelValue(0, input->data_type(), input->quantization_info()); + zero_value = PixelValue(0, src->data_type(), src->quantization_info()); } auto b = std::make_unique(); - b->configure(input, _pooling_layer_kernel->border_size(), border_mode, zero_value); + b->configure(src, _pooling_layer_kernel->border_size(), border_mode, zero_value); _border_handler = std::move(b); break; } @@ -88,23 +106,26 @@ void CpuPooling::configure(ITensorInfo *input, ITensorInfo *output, const Poolin } } -Status CpuPooling::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) +Status CpuPooling::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) { - const bool run_optimised = bool(CpuPoolingAssemblyDispatch::validate(input, output, pool_info)) && (indices == nullptr); + const bool run_optimised = bool(kernels::CpuPoolingAssemblyWrapperKernel::validate(src, dst, pool_info)) && (indices == nullptr); if(run_optimised) { return Status{}; } - return kernels::CpuPoolingKernel::validate(input, output, pool_info, indices); + return kernels::CpuPoolingKernel::validate(src, dst, pool_info, indices); } void CpuPooling::run(ITensorPack &tensors) { - if(_asm_glue && _asm_glue->is_configured()) + ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No tensors provided"); + + if(_asm_glue) { - _asm_glue->run(tensors); + const auto hints = (_is_global_pooling_layer) ? Window::DimX : Window::DimY; + NEScheduler::get().schedule_op(_asm_glue.get(), hints, _asm_glue->window(), tensors); } else { @@ -126,5 +147,10 @@ void CpuPooling::run(ITensorPack &tensors) } } } + +experimental::MemoryRequirements CpuPooling::workspace() const +{ + return _mem_req; +} } // namespace cpu } // namespace arm_compute diff --git a/src/runtime/cpu/operators/CpuPooling.h b/src/runtime/cpu/operators/CpuPooling.h index b1647ea689..bc30adf762 100644 --- a/src/runtime/cpu/operators/CpuPooling.h +++ b/src/runtime/cpu/operators/CpuPooling.h @@ -26,8 +26,7 @@ #include "src/runtime/cpu/ICpuOperator.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "src/runtime/cpu/operators/CpuPoolingAssemblyDispatch.h" +#include "arm_compute/core/experimental/Types.h" #include @@ -38,19 +37,17 @@ struct PoolingLayerInfo; namespace cpu { -// Forward Declarations -class CpuPoolingAssemblyDispatch; /** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following kernels: * * -# @ref NEFillBorderKernel (executed if padding size is different from zero) * -# @ref kernels::CpuPoolingKernel - * -# @ref CpuPoolingAssemblyDispatch + * -# @ref kernels::CpuPoolingAssemblyWrapperKernel */ class CpuPooling : public ICpuOperator { public: /** Constructor */ - CpuPooling(std::shared_ptr memory_manager = nullptr); + CpuPooling(); /** Prevent instances of this class from being copied (As this class contains pointers) */ CpuPooling(const CpuPooling &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ @@ -86,16 +83,16 @@ public: // Inherited methods overridden: void run(ITensorPack &tensors) override; + experimental::MemoryRequirements workspace() const override; private: - std::shared_ptr _memory_manager; - - std::unique_ptr _pooling_layer_kernel; - std::unique_ptr _border_handler; - std::unique_ptr _asm_glue; + std::unique_ptr _pooling_layer_kernel; + std::unique_ptr _border_handler; + std::unique_ptr _asm_glue; - bool _is_global_pooling_layer; - DataLayout _data_layout; + bool _is_global_pooling_layer; + DataLayout _data_layout; + experimental::MemoryRequirements _mem_req; }; } // namespace cpu } // namespace arm_compute diff --git a/src/runtime/cpu/operators/CpuPoolingAssemblyDispatch.cpp b/src/runtime/cpu/operators/CpuPoolingAssemblyDispatch.cpp deleted file mode 100644 index e4526c5bd3..0000000000 --- a/src/runtime/cpu/operators/CpuPoolingAssemblyDispatch.cpp +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/cpu/operators/CpuPoolingAssemblyDispatch.h" - -#include "arm_compute/core/ITensor.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "src/core/CPP/Validate.h" -#include "src/core/cpu/kernels/CpuPoolingAssemblyWrapperKernel.h" - -namespace arm_compute -{ -namespace cpu -{ -CpuPoolingAssemblyDispatch::CpuPoolingAssemblyDispatch(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), - _workspace(), - _is_global_pooling_layer(false) -{ -} - -CpuPoolingAssemblyDispatch::~CpuPoolingAssemblyDispatch() = default; - -void CpuPoolingAssemblyDispatch::configure(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info) -{ - const CPUInfo &ci = NEScheduler::get().cpu_info(); - const unsigned int num_threads = NEScheduler::get().num_threads(); - - // If we don't support a combination of data types, silently return: it is the caller's responsibility to check if configure() was successful via is_configured() - if(!CpuPoolingAssemblyDispatch::validate(src, dst, info)) - { - return; - } - - auto pooling_wrapper = std::make_unique(); - ARM_COMPUTE_ERROR_ON(pooling_wrapper == nullptr); - pooling_wrapper->configure(src, dst, info, ci); - - // Check if we have Global Pooling Layer - _is_global_pooling_layer = (src->dimension(2) == info.pool_size.width) && (src->dimension(1) == info.pool_size.height); - - // Allocate workspace based on kernel's memory requirements - constexpr size_t alignment = 4096; - const size_t workspace_size = pooling_wrapper->get_working_size(num_threads); - _workspace.allocator()->init(TensorInfo(TensorShape{ (workspace_size + alignment) }, 1, DataType::S8), alignment); - _memory_group.manage(&_workspace); - _workspace.allocator()->allocate(); - - _kernel = std::move(pooling_wrapper); -} - -Status CpuPoolingAssemblyDispatch::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info) -{ - return kernels::CpuPoolingAssemblyWrapperKernel::validate(src, dst, info); -} - -bool CpuPoolingAssemblyDispatch::is_configured() const -{ - return _kernel != nullptr; -} - -void CpuPoolingAssemblyDispatch::run(ITensorPack &tensors) -{ - ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No srcs provided"); - - tensors.add_tensor(TensorType::ACL_DST_1, &_workspace); - - if(_is_global_pooling_layer) - { - NEScheduler::get().schedule_op(_kernel.get(), Window::DimX, _kernel->window(), tensors); - } - else - { - NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, _kernel->window(), tensors); - } -} -} // namespace cpu -} // namespace arm_compute diff --git a/src/runtime/cpu/operators/CpuPoolingAssemblyDispatch.h b/src/runtime/cpu/operators/CpuPoolingAssemblyDispatch.h deleted file mode 100644 index 353bbe1a78..0000000000 --- a/src/runtime/cpu/operators/CpuPoolingAssemblyDispatch.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CPU_POOLING_ASSEMBLY_DISPATCH_H -#define ARM_COMPUTE_CPU_POOLING_ASSEMBLY_DISPATCH_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" -#include "src/runtime/cpu/ICpuOperator.h" - -namespace arm_compute -{ -namespace cpu -{ -class ITensor; - -/** Basic function to run pooling assembly kernels */ -class CpuPoolingAssemblyDispatch : public ICpuOperator -{ -public: - /** Constructor */ - CpuPoolingAssemblyDispatch(std::shared_ptr memory_manager = nullptr); - /** Prevent instances of this class from being copied */ - CpuPoolingAssemblyDispatch(const CpuPoolingAssemblyDispatch &) = delete; - /** Default move constructor */ - CpuPoolingAssemblyDispatch(CpuPoolingAssemblyDispatch &&) = default; - /** Prevent instances of this class from being copied */ - CpuPoolingAssemblyDispatch &operator=(const CpuPoolingAssemblyDispatch &) = delete; - /** Default move assignment operator */ - CpuPoolingAssemblyDispatch &operator=(CpuPoolingAssemblyDispatch &&) = default; - /** Destructor */ - ~CpuPoolingAssemblyDispatch(); - - /** If supported create an assembly routine, else fallback to Compute Library function. - * - * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] dst Destination tensor info to store the result of pooling. Data types supported: same as @p src. - * @param[in] info Pooling meta-data - */ - void configure(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info); - - /** Indicates whether or not this function can be used to process the given parameters. - * - * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] dst Destination tensor to store the result of pooling. Data types supported: same as @p src. - * @param[in] info Pooling meta-data - * - * @return a status. - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info); - /** Was the function successfully configured ? - * - * @return True if the function is configured and ready to run - */ - bool is_configured() const; - // Run method overriden - void run(ITensorPack &tensors) override; - -private: - arm_compute::MemoryGroup _memory_group; - - arm_compute::Tensor _workspace; - bool _is_global_pooling_layer; -}; -} // namespace cpu -} // namespace arm_compute -#endif /* ARM_COMPUTE_CPU_POOLING_ASSEMBLY_DISPATCH_H */ -- cgit v1.2.1