From ec2256b81e6d6f655dcfbc76683738fbfeb82bcc Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 3 Dec 2020 18:51:58 +0000 Subject: Move NEGEMMAssemblyDispatch as an internal function Signed-off-by: Georgios Pinitas Change-Id: I89ee26c1595d510c5048904cae9422528b76cd45 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4662 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- src/runtime/NEON/functions/NEGEMM.cpp | 18 +-- .../NEON/functions/NEGEMMAssemblyDispatch.cpp | 2 +- .../NEON/functions/NEGEMMAssemblyDispatch.h | 125 +++++++++++++++++++++ src/runtime/NEON/functions/NEGEMMConv2d.cpp | 14 ++- .../functions/NEGEMMLowpMatrixMultiplyCore.cpp | 27 ++--- .../NEON/functions/NEWinogradConvolutionLayer.cpp | 2 +- 6 files changed, 160 insertions(+), 28 deletions(-) create mode 100644 src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h (limited to 'src') diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index 03f5aa37c1..6d83480cb9 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -31,7 +31,6 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/TensorAllocator.h" #include "src/core/CPP/Validate.h" #include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" @@ -39,6 +38,7 @@ #include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" #include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "src/core/helpers/AutoConfiguration.h" +#include "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include @@ -61,7 +61,7 @@ AsmGemmInfo init_assembly_metadata(const GEMMInfo &info) } // namespace NEGEMM::NEGEMM(std::shared_ptr memory_manager, IWeightsManager *weights_manager) - : _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(memory_manager, weights_manager), _ma_kernel(), + : _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(std::make_unique()), _ma_kernel(), _alpha_scale_func(nullptr), _add_bias(), _activation_func(), _tmp_a(), _tmp_b(), _tmp_d(), _original_b(nullptr), _run_vector_matrix_multiplication(false), _run_alpha_scale(false), _run_addition(false), _run_bias_addition(false), _run_activation(false), _reshape_b_only_on_first_run(false), _is_prepared(false) { @@ -90,8 +90,8 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe if(run_optimised) { const ITensor *c_to_use = is_c_bias ? c : nullptr; - _asm_glue.configure(a, b, c_to_use, d, asm_info); - ARM_COMPUTE_ERROR_ON(!_asm_glue.is_configured()); + _asm_glue->configure(a, b, c_to_use, d, asm_info); + ARM_COMPUTE_ERROR_ON(!_asm_glue->is_configured()); // Scale product by alpha if(_run_alpha_scale) @@ -312,9 +312,9 @@ void NEGEMM::run() MemoryGroupResourceScope scope_mg(_memory_group); - if(_asm_glue.is_configured()) + if(_asm_glue->is_configured()) { - _asm_glue.run(); + _asm_glue->run(); if(_run_alpha_scale) { _alpha_scale_func.run(); @@ -361,20 +361,20 @@ void NEGEMM::prepare() if(!_is_prepared) { const bool original_b_managed_by_weights_manager = _weights_manager && _weights_manager->are_weights_managed(_original_b); - if(_asm_glue.is_configured()) + if(_asm_glue->is_configured()) { if(!original_b_managed_by_weights_manager) { ARM_COMPUTE_ERROR_ON(!_original_b->is_used()); } - _asm_glue.prepare(); + _asm_glue->prepare(); if(!original_b_managed_by_weights_manager) { _original_b->mark_as_unused(); } } - else if(_reshape_b_only_on_first_run && !_run_vector_matrix_multiplication && !_asm_glue.is_configured()) + else if(_reshape_b_only_on_first_run && !_run_vector_matrix_multiplication && !_asm_glue->is_configured()) { if(!original_b_managed_by_weights_manager) { diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp index b54389cf5f..1c86393406 100644 --- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp +++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" +#include "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "src/core/CPP/Validate.h" diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h new file mode 100644 index 0000000000..466e60183a --- /dev/null +++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_NEGEMMASSEMBLYDISPATCH_H +#define SRC_NEGEMMASSEMBLYDISPATCH_H + +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/IWeightsManager.h" +#include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +/* Convolution method supported by the assembly gemm interface */ +enum class AsmConvMethod +{ + Im2Col, + Indirect, + Conv +}; + +struct AsmGemmInfo +{ + AsmConvMethod method{ AsmConvMethod::Im2Col }; + PadStrideInfo ps_info{}; + ActivationLayerInfo activation_info{}; + GEMMLowpOutputStageInfo output_stage{}; + bool negated_offsets{ true }; + bool reinterpret_input_as_3d{ false }; + bool depth_output_gemm3d{ false }; + int64_t padding_top{ 0 }; + int64_t padding_left{ 0 }; + float padding_value{ 0.f }; +}; + +/** Assembly kernel glue */ +class NEGEMMAssemblyDispatch : public IFunction +{ +public: + /** Constructor */ + NEGEMMAssemblyDispatch(std::shared_ptr memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); + /** Prevent instances of this class from being copy constructed */ + NEGEMMAssemblyDispatch(const NEGEMMAssemblyDispatch &) = delete; + /** Prevent instances of this class from being copied */ + NEGEMMAssemblyDispatch &operator=(const NEGEMMAssemblyDispatch &) = delete; + NEGEMMAssemblyDispatch(NEGEMMAssemblyDispatch &&) = default; + NEGEMMAssemblyDispatch &operator=(NEGEMMAssemblyDispatch &&) = default; + ~NEGEMMAssemblyDispatch() = default; + + class IFallback + { + public: + virtual void run() = 0; + virtual void prepare() = 0; + virtual bool is_configured() const = 0; + virtual ~IFallback() = default; + }; + +public: + /** If supported create a Compute Library function else fallback to the arm_gemm function. + * + * @param[in] a Input tensor (Matrix A) + * @param[in] b Input tensor (Matrix B) + * @param[in] c Input tensor (Matrix C) used to pass the bias for quantized calculations + * @param[out] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. + * @param[in] info GEMM meta-data + */ + void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, const AsmGemmInfo &info); + + /** Indicates whether or not this function can be used to process the given parameters. + * + * @param[in] a Input tensor info (Matrix A) + * @param[in] b Input tensor info (Matrix B) + * @param[in] c Input tensor info (Matrix C) used to pass the bias for quantized calculations + * @param[in] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. + * @param[in] info GEMM meta-data + * + * @return a status. + */ + static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, const AsmGemmInfo &info); + /** Checks if activation is supported by the gemm assembly dispatcher + * + * @param[in] activation Activation to check + * + * @return True if activation is supported else false + */ + static bool is_activation_supported(const ActivationLayerInfo &activation); + /** Was the function successfully configured ? + * + * @return True if the function is configured and ready to run + */ + bool is_configured() const; + + // Inherited methods overridden: + void prepare() override; + void run() override; + +private: + std::unique_ptr _arm_gemm; /** Interface for the arm_gemm fallback */ + MemoryGroup _memory_group; /**< Function memory group */ + IWeightsManager *_weights_manager; /**< Pointer to the weights manager */ +}; +} // namespace arm_compute +#endif /* SRC_NEGEMMASSEMBLYDISPATCH_H */ diff --git a/src/runtime/NEON/functions/NEGEMMConv2d.cpp b/src/runtime/NEON/functions/NEGEMMConv2d.cpp index 860b6bb4e1..b8349d98db 100644 --- a/src/runtime/NEON/functions/NEGEMMConv2d.cpp +++ b/src/runtime/NEON/functions/NEGEMMConv2d.cpp @@ -22,9 +22,11 @@ * SOFTWARE. */ #include "arm_compute/runtime/NEON/functions/NEGEMMConv2d.h" + #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include @@ -81,9 +83,13 @@ AsmGemmInfo init_assembly_metadata(const Conv2dInfo &info, bool is_indirect) } // namespace NEGEMMConv2d::NEGEMMConv2d(const std::shared_ptr &memory_manager) - : _gemm_asm_func(memory_manager), _activation_func(), _weights_permute_func(), _original_weights(nullptr), _permuted_weights(), _is_prepared(false), _run_activation(false) + : _gemm_asm_func(std::make_unique(memory_manager)), _activation_func(), _weights_permute_func(), _original_weights(nullptr), _permuted_weights(), _is_prepared(false), + _run_activation(false) { } + +NEGEMMConv2d::~NEGEMMConv2d() = default; + void NEGEMMConv2d::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const Conv2dInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); @@ -101,10 +107,10 @@ void NEGEMMConv2d::configure(ITensor *input, const ITensor *weights, const ITens { asm_info.output_stage = calculate_output_stage_metadata(input->info(), weights->info(), output->info(), info.act_info); } - _gemm_asm_func.configure(input, &_permuted_weights, biases, output, asm_info); + _gemm_asm_func->configure(input, &_permuted_weights, biases, output, asm_info); // Configure activation - if(info.act_info.enabled() && !_gemm_asm_func.is_activation_supported(info.act_info)) + if(info.act_info.enabled() && !_gemm_asm_func->is_activation_supported(info.act_info)) { _activation_func.configure(output, nullptr, info.act_info); _run_activation = true; @@ -150,7 +156,7 @@ void NEGEMMConv2d::run() { prepare(); - _gemm_asm_func.run(); + _gemm_asm_func->run(); if(_run_activation) { _activation_func.run(); diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp index 50c7fe4c66..921626f0fe 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp @@ -42,6 +42,7 @@ #include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" #include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" #include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" namespace arm_compute { @@ -65,10 +66,10 @@ using namespace arm_compute::misc::shape_calculator; NEGEMMLowpMatrixMultiplyCore::~NEGEMMLowpMatrixMultiplyCore() = default; NEGEMMLowpMatrixMultiplyCore::NEGEMMLowpMatrixMultiplyCore(std::shared_ptr memory_manager, IWeightsManager *weights_manager) - : _memory_group(memory_manager), _weights_manager(weights_manager), _asm_glue(memory_manager, weights_manager), _mm_kernel(), _mtx_a_reshape_kernel(), _mtx_b_reshape_kernel(), - _mtx_a_reduction_kernel(), _mtx_b_reduction_kernel(), _offset_contribution_kernel(), _offset_contribution_output_stage_kernel(), _activation_func(), _convert_to_signed_asymm(), - _convert_from_signed_asymm(), _vector_sum_col(), _vector_sum_row(), _tmp_a(), _tmp_b(), _mm_result_s32(), _signed_a(), _signed_output(), _original_b(nullptr), _a_offset(0), _b_offset(0), - _run_vector_matrix_multiplication(false), _assembly_path(false), _fused_assembly_path(false), _reshape_b_only_on_first_run(false), _is_prepared(false), _fuse_output_stage(false), + : _memory_group(memory_manager), _weights_manager(weights_manager), _asm_glue(std::make_unique(memory_manager, weights_manager)), _mm_kernel(), _mtx_a_reshape_kernel(), + _mtx_b_reshape_kernel(), _mtx_a_reduction_kernel(), _mtx_b_reduction_kernel(), _offset_contribution_kernel(), _offset_contribution_output_stage_kernel(), _activation_func(), + _convert_to_signed_asymm(), _convert_from_signed_asymm(), _vector_sum_col(), _vector_sum_row(), _tmp_a(), _tmp_b(), _mm_result_s32(), _signed_a(), _signed_output(), _original_b(nullptr), _a_offset(0), + _b_offset(0), _run_vector_matrix_multiplication(false), _assembly_path(false), _fused_assembly_path(false), _reshape_b_only_on_first_run(false), _is_prepared(false), _fuse_output_stage(false), _run_activation(false), _flip_signedness(false) { } @@ -145,14 +146,14 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, { if(is_data_type_quantized_asymmetric(a_to_use->info()->data_type()) && info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT) { - _asm_glue.configure(a_to_use, b, c, output, asm_info); - _fused_assembly_path = _asm_glue.is_configured(); + _asm_glue->configure(a_to_use, b, c, output, asm_info); + _fused_assembly_path = _asm_glue->is_configured(); } else { - _asm_glue.configure(a_to_use, b, nullptr, _fuse_output_stage ? &_mm_result_s32 : output, asm_info); + _asm_glue->configure(a_to_use, b, nullptr, _fuse_output_stage ? &_mm_result_s32 : output, asm_info); } - _assembly_path = _asm_glue.is_configured(); + _assembly_path = _asm_glue->is_configured(); break; } default: @@ -510,9 +511,9 @@ void NEGEMMLowpMatrixMultiplyCore::run() } // Run GEMM - if(_asm_glue.is_configured()) + if(_asm_glue->is_configured()) { - _asm_glue.run(); + _asm_glue->run(); } else { @@ -575,21 +576,21 @@ void NEGEMMLowpMatrixMultiplyCore::prepare() { const bool original_b_managed_by_weights_manager = _weights_manager && _weights_manager->are_weights_managed(_original_b); // Run assembly reshape - if(_asm_glue.is_configured()) + if(_asm_glue->is_configured()) { if(!original_b_managed_by_weights_manager) { ARM_COMPUTE_ERROR_ON(!_original_b->is_used()); } - _asm_glue.prepare(); + _asm_glue->prepare(); if(!original_b_managed_by_weights_manager) { _original_b->mark_as_unused(); } } // Run non-assembly reshape - else if(_reshape_b_only_on_first_run && !_run_vector_matrix_multiplication && !_asm_glue.is_configured()) + else if(_reshape_b_only_on_first_run && !_run_vector_matrix_multiplication && !_asm_glue->is_configured()) { if(!original_b_managed_by_weights_manager) { diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp index 265df9246f..bd3bdd6a26 100644 --- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp @@ -28,13 +28,13 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "src/core/CPP/Validate.h" #include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" #include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" #include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" #include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h" +#include "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "src/core/NEON/kernels/convolution/common/utils.hpp" #include "src/core/NEON/kernels/convolution/winograd/winograd.hpp" -- cgit v1.2.1