From b55f8e848a841e4d75fce0e8324c23c3876d2f71 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Thu, 22 Jul 2021 11:23:11 +0100 Subject: Port NEConvolutionLayer Resolves: COMPMID-4507 Change-Id: I9557026ec0052b5585994f7a1300a14565c976d0 Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5964 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas --- src/runtime/NEON/functions/NEConvolutionLayer.cpp | 221 ++++++---------------- 1 file changed, 59 insertions(+), 162 deletions(-) (limited to 'src/runtime/NEON/functions/NEConvolutionLayer.cpp') diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp index ade717805d..0239514b17 100644 --- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp @@ -26,25 +26,38 @@ #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMConv2d.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h" - -#include -#include -#include +#include "src/core/helpers/MemoryHelpers.h" +#include "src/runtime/cpu/operators/CpuConv2d.h" +#include "src/runtime/cpu/operators/CpuDirectConv2d.h" +#include "src/runtime/cpu/operators/CpuGemmConvolution.h" +#include "src/runtime/cpu/operators/CpuGemmDirectConv2d.h" +#include "src/runtime/cpu/operators/CpuWinogradConv2d.h" namespace arm_compute { -NEConvolutionLayer::NEConvolutionLayer(std::shared_ptr memory_manager) //NOLINT - : _memory_manager(std::move(memory_manager)), - _function() +using namespace arm_compute::experimental; + +struct NEConvolutionLayer::Impl +{ + MemoryGroup memory_group{}; + std::shared_ptr memory_manager{}; + std::unique_ptr op{ nullptr }; + ITensorPack run_pack{}; + ITensorPack prep_pack{}; + WorkspaceData workspace{}; + experimental::MemoryRequirements aux_mem_req{}; + std::unique_ptr func{ nullptr }; +}; + +NEConvolutionLayer::NEConvolutionLayer(std::shared_ptr memory_manager) + : _impl(std::make_unique()) { + _impl->memory_manager = std::move(memory_manager); } +NEConvolutionLayer::~NEConvolutionLayer() = default; + void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups) { @@ -55,206 +68,90 @@ void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const enable_fast_math, num_groups)); const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, num_groups); - switch(NEConvolutionLayer::get_convolution_method(input->info(), weights->info(), output->info(), conv_info, weights_info, dilation, act_info, enable_fast_math)) + switch(cpu::CpuConv2d::get_convolution_method(input->info(), weights->info(), output->info(), conv_info, weights_info, dilation, act_info, enable_fast_math)) { case ConvolutionMethod::WINOGRAD: - { - auto f = std::make_unique(_memory_manager); - f->configure(input, weights, biases, output, conv_info, act_info, enable_fast_math); - _function = std::move(f); - break; - } case ConvolutionMethod::GEMM: - { - auto f = std::make_unique(_memory_manager); - f->configure(input, weights, biases, output, conv_info, weights_info, dilation, act_info, enable_fast_math); - _function = std::move(f); - break; - } case ConvolutionMethod::GEMM_CONV2D: - { - auto f = std::make_unique(_memory_manager); - f->configure(input, weights, biases, output, info); - _function = std::move(f); - break; - } case ConvolutionMethod::DIRECT: { - auto f = std::make_unique(_memory_manager); - f->configure(input, weights, biases, output, conv_info, act_info); - _function = std::move(f); + auto f = std::make_unique(); + f->configure(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info, enable_fast_math, num_groups); + _impl->op = std::move(f); break; } case ConvolutionMethod::FFT: { - auto f = std::make_unique(_memory_manager); + auto f = std::make_unique(_impl->memory_manager); f->configure(input, weights, biases, output, conv_info, act_info); - _function = std::move(f); + _impl->func = std::move(f); break; } default: ARM_COMPUTE_ERROR("Not supported."); break; } + + if(_impl->op) + { + _impl->memory_group = MemoryGroup(std::move(_impl->memory_manager)); + _impl->aux_mem_req = _impl->op->workspace(); + _impl->run_pack = { { ACL_SRC_0, input }, { ACL_SRC_1, weights }, { ACL_SRC_2, biases }, { ACL_DST, output } }; + _impl->prep_pack = { { ACL_SRC_1, weights }, { ACL_SRC_2, biases } }; + _impl->workspace = manage_workspace(_impl->aux_mem_req, _impl->memory_group, _impl->run_pack, _impl->prep_pack); + } } Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups) { - ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1), "Grouping (num_groups != 1) is not supported on Neon"); - const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, num_groups); - switch(NEConvolutionLayer::get_convolution_method(input, weights, output, conv_info, weights_info, dilation, act_info, enable_fast_math)) + switch(cpu::CpuConv2d::get_convolution_method(input, weights, output, conv_info, weights_info, dilation, act_info, enable_fast_math)) { case ConvolutionMethod::WINOGRAD: - ARM_COMPUTE_RETURN_ON_ERROR(NEWinogradConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info, enable_fast_math)); - break; case ConvolutionMethod::GEMM: - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info, enable_fast_math)); - break; case ConvolutionMethod::GEMM_CONV2D: - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMConv2d::validate(input, weights, biases, output, info)); - break; case ConvolutionMethod::DIRECT: - ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info)); + ARM_COMPUTE_RETURN_ON_ERROR(cpu::CpuConv2d::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info, enable_fast_math, num_groups)); break; case ConvolutionMethod::FFT: - ARM_COMPUTE_RETURN_ON_ERROR(NEFFTConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info)); + ARM_COMPUTE_RETURN_ON_ERROR(NEFFTConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info)); break; default: ARM_COMPUTE_ERROR("Not supported."); break; } - return Status{}; } -ConvolutionMethod NEConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math) +void NEConvolutionLayer::run() { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, weights); - ARM_COMPUTE_UNUSED(weights_info); - - const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT); - const size_t idx_c = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL); - - const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, 1); - - /* Input spatial dims, kernel size, IFM/OFM, conv info*/ - using ConvolutionConfiguration = std::tuple; - using ConfigurationMethod = std::pair; + prepare(); - const std::vector known_configs = - { - // Alexnet - ConfigurationMethod(ConvolutionConfiguration(Size2D(27U, 27U), Size2D(5U, 5U), Size2D(48U, 128U), PadStrideInfo(1U, 1U, 2U, 2U)), ConvolutionMethod::GEMM), - // VGG16 / VGG19 - ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 64U), PadStrideInfo(1U, 1U, 1U, 1U)), ConvolutionMethod::GEMM), - // Mobilenet 224 - ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR)), ConvolutionMethod::GEMM), - // Mobilenet 160 - ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR)), ConvolutionMethod::GEMM) - }; + MemoryGroupResourceScope scope_mg(_impl->memory_group); - const auto find_config = [&](ConfigurationMethod c) + if(_impl->func) { - const ConvolutionConfiguration config = c.first; - const PadStrideInfo info = std::get<3>(config); - - return std::get<0>(config) == Size2D(input->dimension(idx_w), input->dimension(idx_h)) && std::get<1>(config) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h)) - && std::get<2>(config) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right() - && info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() && info.stride() == conv_info.stride(); - }; - - std::vector::const_iterator found; - if((found = std::find_if(known_configs.begin(), known_configs.end(), find_config)) != known_configs.end()) + _impl->func->run(); + } + else { - return (*found).second; + _impl->op->run(_impl->run_pack); } +} - if(dilation != Size2D(1U, 1U)) +void NEConvolutionLayer::prepare() +{ + if(_impl->func) { - return ConvolutionMethod::GEMM; + _impl->func->prepare(); } else { - const auto input_layout = input->data_layout(); - // SRGAN - // Output might not be initialized when it is an internal tensor of the layer using the convolution - if(input_layout == DataLayout::NHWC && input->total_size() > 1e7 && (weights->dimension(idx_h) > 7) - && (NEDirectConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info))) - { - return ConvolutionMethod::DIRECT; - } - if((weights->dimension(idx_h) > 7) && (input->dimension(idx_c) > output->dimension(idx_c)) && (NEFFTConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info))) - { - return ConvolutionMethod::FFT; - } - if(input->dimension(idx_c) < 16) - { - return ConvolutionMethod::GEMM; - } - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - // This heuristics only applies to F16 data type on A55r1 - if(NEScheduler::get().cpu_info().get_cpu_model() == CPUModel::A55r1 && enable_fast_math && input->data_type() == DataType::F16) - { - // Exclude known bad winograd configs (and defaults to GEMM) - const std::vector known_bad_winograd_f16_with_fastmath_configs = - { - // Squeezenet_V1_1 fire2 and fire3 - ConvolutionConfiguration(Size2D(56U, 56U), Size2D(3U, 3U), Size2D(16U, 64U), PadStrideInfo(1U, 1U, 1U, 1U)), - // Squeezenet_V1_1 fire6 and fire7 - ConvolutionConfiguration(Size2D(14U, 14U), Size2D(3U, 3U), Size2D(48U, 192U), PadStrideInfo(1U, 1U, 1U, 1U)), - // Squeezenet_V1_1 fire8 and fire9 - ConvolutionConfiguration(Size2D(14U, 14U), Size2D(3U, 3U), Size2D(64U, 256U), PadStrideInfo(1U, 1U, 1U, 1U)), - }; - const auto find_conv_config = [&](ConvolutionConfiguration c) - { - const PadStrideInfo info = std::get<3>(c); - - return std::get<0>(c) == Size2D(input->dimension(idx_w), input->dimension(idx_h)) && std::get<1>(c) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h)) - && std::get<2>(c) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right() - && info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() && info.stride() == conv_info.stride(); - }; - - bool found_bad = std::find_if(known_bad_winograd_f16_with_fastmath_configs.begin(), known_bad_winograd_f16_with_fastmath_configs.end(), - find_conv_config) - != known_bad_winograd_f16_with_fastmath_configs.end(); - if(found_bad) - { - return ConvolutionMethod::GEMM; - } - } -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - // For 1x1 convolutions run the default GEMM - if(weights->dimension(idx_w) == 1 && weights->dimension(idx_h) == 1) - { - return ConvolutionMethod::GEMM; - } + _impl->op->prepare(_impl->prep_pack); - if(bool(NEWinogradConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math))) - { - return ConvolutionMethod::WINOGRAD; - } - if(bool(NEGEMMConv2d::validate(input, weights, nullptr, output, info))) - { - return ConvolutionMethod::GEMM_CONV2D; - } - return ConvolutionMethod::GEMM; + // Release temporary tensors that are only used in prepare stage + release_temporaries(_impl->aux_mem_req, _impl->workspace); } } - -void NEConvolutionLayer::run() -{ - prepare(); - _function->run(); -} - -void NEConvolutionLayer::prepare() -{ - _function->prepare(); -} } // namespace arm_compute -- cgit v1.2.1