From ec2256b81e6d6f655dcfbc76683738fbfeb82bcc Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 3 Dec 2020 18:51:58 +0000 Subject: Move NEGEMMAssemblyDispatch as an internal function Signed-off-by: Georgios Pinitas Change-Id: I89ee26c1595d510c5048904cae9422528b76cd45 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4662 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- arm_compute/runtime/NEON/NEFunctions.h | 1 - arm_compute/runtime/NEON/functions/NEGEMM.h | 6 +- .../NEON/functions/NEGEMMAssemblyDispatch.h | 125 --------------------- arm_compute/runtime/NEON/functions/NEGEMMConv2d.h | 19 ++-- .../NEON/functions/NEGEMMLowpMatrixMultiplyCore.h | 5 +- docs/00_introduction.dox | 3 +- src/runtime/NEON/functions/NEGEMM.cpp | 18 +-- .../NEON/functions/NEGEMMAssemblyDispatch.cpp | 2 +- .../NEON/functions/NEGEMMAssemblyDispatch.h | 125 +++++++++++++++++++++ src/runtime/NEON/functions/NEGEMMConv2d.cpp | 14 ++- .../functions/NEGEMMLowpMatrixMultiplyCore.cpp | 27 ++--- .../NEON/functions/NEWinogradConvolutionLayer.cpp | 2 +- 12 files changed, 180 insertions(+), 167 deletions(-) delete mode 100644 arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h create mode 100644 src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h index 5ac94102fc..f35144481d 100644 --- a/arm_compute/runtime/NEON/NEFunctions.h +++ b/arm_compute/runtime/NEON/NEFunctions.h @@ -75,7 +75,6 @@ #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" #include "arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/NEON/functions/NEGEMMConv2d.h" #include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index 645ab56417..124f027227 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -30,17 +30,19 @@ #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/Tensor.h" #include namespace arm_compute { +// Forward declarations class NEGEMMInterleave4x4Kernel; class NEGEMMMatrixAdditionKernel; class NEGEMMMatrixMultiplyKernel; class NEGEMMTranspose1xWKernel; +class NEGEMMAssemblyDispatch; + /** Basic function to execute GEMM on NEON. This function calls the following NEON kernels: * * If optimized assembly is available: @@ -112,7 +114,7 @@ private: std::unique_ptr _interleave_kernel; std::unique_ptr _transpose_kernel; std::unique_ptr _mm_kernel; - NEGEMMAssemblyDispatch _asm_glue; + std::unique_ptr _asm_glue; std::unique_ptr _ma_kernel; NEActivationLayer _alpha_scale_func; NEArithmeticAddition _add_bias; diff --git a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h b/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h deleted file mode 100644 index 8f9498d0f5..0000000000 --- a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H -#define ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H - -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/IWeightsManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -namespace arm_compute -{ -/* Convolution method supported by the assembly gemm interface */ -enum class AsmConvMethod -{ - Im2Col, - Indirect, - Conv -}; - -struct AsmGemmInfo -{ - AsmConvMethod method{ AsmConvMethod::Im2Col }; - PadStrideInfo ps_info{}; - ActivationLayerInfo activation_info{}; - GEMMLowpOutputStageInfo output_stage{}; - bool negated_offsets{ true }; - bool reinterpret_input_as_3d{ false }; - bool depth_output_gemm3d{ false }; - int64_t padding_top{ 0 }; - int64_t padding_left{ 0 }; - float padding_value{ 0.f }; -}; - -/** Assembly kernel glue */ -class NEGEMMAssemblyDispatch : public IFunction -{ -public: - /** Constructor */ - NEGEMMAssemblyDispatch(std::shared_ptr memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); - /** Prevent instances of this class from being copy constructed */ - NEGEMMAssemblyDispatch(const NEGEMMAssemblyDispatch &) = delete; - /** Prevent instances of this class from being copied */ - NEGEMMAssemblyDispatch &operator=(const NEGEMMAssemblyDispatch &) = delete; - NEGEMMAssemblyDispatch(NEGEMMAssemblyDispatch &&) = default; - NEGEMMAssemblyDispatch &operator=(NEGEMMAssemblyDispatch &&) = default; - ~NEGEMMAssemblyDispatch() = default; - - class IFallback - { - public: - virtual void run() = 0; - virtual void prepare() = 0; - virtual bool is_configured() const = 0; - virtual ~IFallback() = default; - }; - -public: - /** If supported create a Compute Library function else fallback to the arm_gemm function. - * - * @param[in] a Input tensor (Matrix A) - * @param[in] b Input tensor (Matrix B) - * @param[in] c Input tensor (Matrix C) used to pass the bias for quantized calculations - * @param[out] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. - * @param[in] info GEMM meta-data - */ - void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, const AsmGemmInfo &info); - - /** Indicates whether or not this function can be used to process the given parameters. - * - * @param[in] a Input tensor info (Matrix A) - * @param[in] b Input tensor info (Matrix B) - * @param[in] c Input tensor info (Matrix C) used to pass the bias for quantized calculations - * @param[in] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. - * @param[in] info GEMM meta-data - * - * @return a status. - */ - static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, const AsmGemmInfo &info); - /** Checks if activation is supported by the gemm assembly dispatcher - * - * @param[in] activation Activation to check - * - * @return True if activation is supported else false - */ - static bool is_activation_supported(const ActivationLayerInfo &activation); - /** Was the function successfully configured ? - * - * @return True if the function is configured and ready to run - */ - bool is_configured() const; - - // Inherited methods overridden: - void prepare() override; - void run() override; - -private: - std::unique_ptr _arm_gemm; /** Interface for the arm_gemm fallback */ - MemoryGroup _memory_group; /**< Function memory group */ - IWeightsManager *_weights_manager; /**< Pointer to the weights manager */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h index 7cae39397f..2b3c162eab 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h @@ -28,7 +28,6 @@ #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/NEON/functions/NEPermute.h" #include "arm_compute/runtime/Tensor.h" @@ -37,6 +36,8 @@ namespace arm_compute { // Forward declarations class ITensor; +class NEGEMMAssemblyDispatch; + /** Basic function to compute the convolution layer. This function calls the following NEON kernels/functions: * * Supports only NHWC data layout @@ -60,6 +61,8 @@ public: NEGEMMConv2d &operator=(const NEGEMMConv2d &) = delete; /** Default move assignment operator */ NEGEMMConv2d &operator=(NEGEMMConv2d &&) = default; + /** Destructor */ + ~NEGEMMConv2d(); /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -96,13 +99,13 @@ public: void prepare() override; private: - NEGEMMAssemblyDispatch _gemm_asm_func; - NEActivationLayer _activation_func; - NEPermute _weights_permute_func; - const ITensor *_original_weights; - Tensor _permuted_weights; - bool _is_prepared; - bool _run_activation; + std::unique_ptr _gemm_asm_func; + NEActivationLayer _activation_func; + NEPermute _weights_permute_func; + const ITensor *_original_weights; + Tensor _permuted_weights; + bool _is_prepared; + bool _run_activation; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEGEMMCONV2D_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h index cb1d6bd782..8eea9d7d24 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h @@ -27,8 +27,8 @@ #include "NEActivationLayer.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/IWeightsManager.h" #include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/Tensor.h" #include @@ -45,6 +45,7 @@ class NEGEMMLowpOffsetContributionOutputStageKernel; class NEGEMMLowpMatrixAReductionKernel; class NEGEMMLowpMatrixBReductionKernel; class NEGEMMTranspose1xWKernel; +class NEGEMMAssemblyDispatch; /** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following NEON kernels if the DOT product instruction is not available: * @@ -115,7 +116,7 @@ public: private: MemoryGroup _memory_group; IWeightsManager *_weights_manager; - NEGEMMAssemblyDispatch _asm_glue; + std::unique_ptr _asm_glue; std::unique_ptr _mm_kernel; std::unique_ptr _mtx_a_reshape_kernel; std::unique_ptr _mtx_b_reshape_kernel; diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox index 7ad4831082..ecdd72c436 100644 --- a/docs/00_introduction.dox +++ b/docs/00_introduction.dox @@ -95,6 +95,7 @@ v21.02 Public major release - NEGEMMInterleave4x4 - NEGEMMTranspose1xW - NEComputeAllAnchors / CLComputeAllAnchors + - NEGEMMAssemblyDispatch - Removed kernels: - NEGEMMMatrixVectorMultiplyKernel - NELocallyConnectedMatrixMultiplyKernel / CLLocallyConnectedMatrixMultiplyKernel @@ -486,7 +487,7 @@ v20.05 Public major release - @ref NEDepthConvertLayerKernel - @ref NEDepthConvertLayer - @ref NEGEMMConvolutionLayer - - @ref NEGEMMAssemblyDispatch + - NEGEMMAssemblyDispatch - Added new data type QASYMM8_SIGNED support for: - @ref CLDirectConvolutionLayer - @ref CLDeconvolutionLayer diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index 03f5aa37c1..6d83480cb9 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -31,7 +31,6 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/TensorAllocator.h" #include "src/core/CPP/Validate.h" #include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" @@ -39,6 +38,7 @@ #include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" #include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "src/core/helpers/AutoConfiguration.h" +#include "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include @@ -61,7 +61,7 @@ AsmGemmInfo init_assembly_metadata(const GEMMInfo &info) } // namespace NEGEMM::NEGEMM(std::shared_ptr memory_manager, IWeightsManager *weights_manager) - : _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(memory_manager, weights_manager), _ma_kernel(), + : _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(std::make_unique()), _ma_kernel(), _alpha_scale_func(nullptr), _add_bias(), _activation_func(), _tmp_a(), _tmp_b(), _tmp_d(), _original_b(nullptr), _run_vector_matrix_multiplication(false), _run_alpha_scale(false), _run_addition(false), _run_bias_addition(false), _run_activation(false), _reshape_b_only_on_first_run(false), _is_prepared(false) { @@ -90,8 +90,8 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe if(run_optimised) { const ITensor *c_to_use = is_c_bias ? c : nullptr; - _asm_glue.configure(a, b, c_to_use, d, asm_info); - ARM_COMPUTE_ERROR_ON(!_asm_glue.is_configured()); + _asm_glue->configure(a, b, c_to_use, d, asm_info); + ARM_COMPUTE_ERROR_ON(!_asm_glue->is_configured()); // Scale product by alpha if(_run_alpha_scale) @@ -312,9 +312,9 @@ void NEGEMM::run() MemoryGroupResourceScope scope_mg(_memory_group); - if(_asm_glue.is_configured()) + if(_asm_glue->is_configured()) { - _asm_glue.run(); + _asm_glue->run(); if(_run_alpha_scale) { _alpha_scale_func.run(); @@ -361,20 +361,20 @@ void NEGEMM::prepare() if(!_is_prepared) { const bool original_b_managed_by_weights_manager = _weights_manager && _weights_manager->are_weights_managed(_original_b); - if(_asm_glue.is_configured()) + if(_asm_glue->is_configured()) { if(!original_b_managed_by_weights_manager) { ARM_COMPUTE_ERROR_ON(!_original_b->is_used()); } - _asm_glue.prepare(); + _asm_glue->prepare(); if(!original_b_managed_by_weights_manager) { _original_b->mark_as_unused(); } } - else if(_reshape_b_only_on_first_run && !_run_vector_matrix_multiplication && !_asm_glue.is_configured()) + else if(_reshape_b_only_on_first_run && !_run_vector_matrix_multiplication && !_asm_glue->is_configured()) { if(!original_b_managed_by_weights_manager) { diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp index b54389cf5f..1c86393406 100644 --- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp +++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" +#include "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "src/core/CPP/Validate.h" diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h new file mode 100644 index 0000000000..466e60183a --- /dev/null +++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_NEGEMMASSEMBLYDISPATCH_H +#define SRC_NEGEMMASSEMBLYDISPATCH_H + +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/IWeightsManager.h" +#include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +/* Convolution method supported by the assembly gemm interface */ +enum class AsmConvMethod +{ + Im2Col, + Indirect, + Conv +}; + +struct AsmGemmInfo +{ + AsmConvMethod method{ AsmConvMethod::Im2Col }; + PadStrideInfo ps_info{}; + ActivationLayerInfo activation_info{}; + GEMMLowpOutputStageInfo output_stage{}; + bool negated_offsets{ true }; + bool reinterpret_input_as_3d{ false }; + bool depth_output_gemm3d{ false }; + int64_t padding_top{ 0 }; + int64_t padding_left{ 0 }; + float padding_value{ 0.f }; +}; + +/** Assembly kernel glue */ +class NEGEMMAssemblyDispatch : public IFunction +{ +public: + /** Constructor */ + NEGEMMAssemblyDispatch(std::shared_ptr memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); + /** Prevent instances of this class from being copy constructed */ + NEGEMMAssemblyDispatch(const NEGEMMAssemblyDispatch &) = delete; + /** Prevent instances of this class from being copied */ + NEGEMMAssemblyDispatch &operator=(const NEGEMMAssemblyDispatch &) = delete; + NEGEMMAssemblyDispatch(NEGEMMAssemblyDispatch &&) = default; + NEGEMMAssemblyDispatch &operator=(NEGEMMAssemblyDispatch &&) = default; + ~NEGEMMAssemblyDispatch() = default; + + class IFallback + { + public: + virtual void run() = 0; + virtual void prepare() = 0; + virtual bool is_configured() const = 0; + virtual ~IFallback() = default; + }; + +public: + /** If supported create a Compute Library function else fallback to the arm_gemm function. + * + * @param[in] a Input tensor (Matrix A) + * @param[in] b Input tensor (Matrix B) + * @param[in] c Input tensor (Matrix C) used to pass the bias for quantized calculations + * @param[out] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. + * @param[in] info GEMM meta-data + */ + void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, const AsmGemmInfo &info); + + /** Indicates whether or not this function can be used to process the given parameters. + * + * @param[in] a Input tensor info (Matrix A) + * @param[in] b Input tensor info (Matrix B) + * @param[in] c Input tensor info (Matrix C) used to pass the bias for quantized calculations + * @param[in] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. + * @param[in] info GEMM meta-data + * + * @return a status. + */ + static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, const AsmGemmInfo &info); + /** Checks if activation is supported by the gemm assembly dispatcher + * + * @param[in] activation Activation to check + * + * @return True if activation is supported else false + */ + static bool is_activation_supported(const ActivationLayerInfo &activation); + /** Was the function successfully configured ? + * + * @return True if the function is configured and ready to run + */ + bool is_configured() const; + + // Inherited methods overridden: + void prepare() override; + void run() override; + +private: + std::unique_ptr _arm_gemm; /** Interface for the arm_gemm fallback */ + MemoryGroup _memory_group; /**< Function memory group */ + IWeightsManager *_weights_manager; /**< Pointer to the weights manager */ +}; +} // namespace arm_compute +#endif /* SRC_NEGEMMASSEMBLYDISPATCH_H */ diff --git a/src/runtime/NEON/functions/NEGEMMConv2d.cpp b/src/runtime/NEON/functions/NEGEMMConv2d.cpp index 860b6bb4e1..b8349d98db 100644 --- a/src/runtime/NEON/functions/NEGEMMConv2d.cpp +++ b/src/runtime/NEON/functions/NEGEMMConv2d.cpp @@ -22,9 +22,11 @@ * SOFTWARE. */ #include "arm_compute/runtime/NEON/functions/NEGEMMConv2d.h" + #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include @@ -81,9 +83,13 @@ AsmGemmInfo init_assembly_metadata(const Conv2dInfo &info, bool is_indirect) } // namespace NEGEMMConv2d::NEGEMMConv2d(const std::shared_ptr &memory_manager) - : _gemm_asm_func(memory_manager), _activation_func(), _weights_permute_func(), _original_weights(nullptr), _permuted_weights(), _is_prepared(false), _run_activation(false) + : _gemm_asm_func(std::make_unique(memory_manager)), _activation_func(), _weights_permute_func(), _original_weights(nullptr), _permuted_weights(), _is_prepared(false), + _run_activation(false) { } + +NEGEMMConv2d::~NEGEMMConv2d() = default; + void NEGEMMConv2d::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const Conv2dInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); @@ -101,10 +107,10 @@ void NEGEMMConv2d::configure(ITensor *input, const ITensor *weights, const ITens { asm_info.output_stage = calculate_output_stage_metadata(input->info(), weights->info(), output->info(), info.act_info); } - _gemm_asm_func.configure(input, &_permuted_weights, biases, output, asm_info); + _gemm_asm_func->configure(input, &_permuted_weights, biases, output, asm_info); // Configure activation - if(info.act_info.enabled() && !_gemm_asm_func.is_activation_supported(info.act_info)) + if(info.act_info.enabled() && !_gemm_asm_func->is_activation_supported(info.act_info)) { _activation_func.configure(output, nullptr, info.act_info); _run_activation = true; @@ -150,7 +156,7 @@ void NEGEMMConv2d::run() { prepare(); - _gemm_asm_func.run(); + _gemm_asm_func->run(); if(_run_activation) { _activation_func.run(); diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp index 50c7fe4c66..921626f0fe 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp @@ -42,6 +42,7 @@ #include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" #include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" #include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" namespace arm_compute { @@ -65,10 +66,10 @@ using namespace arm_compute::misc::shape_calculator; NEGEMMLowpMatrixMultiplyCore::~NEGEMMLowpMatrixMultiplyCore() = default; NEGEMMLowpMatrixMultiplyCore::NEGEMMLowpMatrixMultiplyCore(std::shared_ptr memory_manager, IWeightsManager *weights_manager) - : _memory_group(memory_manager), _weights_manager(weights_manager), _asm_glue(memory_manager, weights_manager), _mm_kernel(), _mtx_a_reshape_kernel(), _mtx_b_reshape_kernel(), - _mtx_a_reduction_kernel(), _mtx_b_reduction_kernel(), _offset_contribution_kernel(), _offset_contribution_output_stage_kernel(), _activation_func(), _convert_to_signed_asymm(), - _convert_from_signed_asymm(), _vector_sum_col(), _vector_sum_row(), _tmp_a(), _tmp_b(), _mm_result_s32(), _signed_a(), _signed_output(), _original_b(nullptr), _a_offset(0), _b_offset(0), - _run_vector_matrix_multiplication(false), _assembly_path(false), _fused_assembly_path(false), _reshape_b_only_on_first_run(false), _is_prepared(false), _fuse_output_stage(false), + : _memory_group(memory_manager), _weights_manager(weights_manager), _asm_glue(std::make_unique(memory_manager, weights_manager)), _mm_kernel(), _mtx_a_reshape_kernel(), + _mtx_b_reshape_kernel(), _mtx_a_reduction_kernel(), _mtx_b_reduction_kernel(), _offset_contribution_kernel(), _offset_contribution_output_stage_kernel(), _activation_func(), + _convert_to_signed_asymm(), _convert_from_signed_asymm(), _vector_sum_col(), _vector_sum_row(), _tmp_a(), _tmp_b(), _mm_result_s32(), _signed_a(), _signed_output(), _original_b(nullptr), _a_offset(0), + _b_offset(0), _run_vector_matrix_multiplication(false), _assembly_path(false), _fused_assembly_path(false), _reshape_b_only_on_first_run(false), _is_prepared(false), _fuse_output_stage(false), _run_activation(false), _flip_signedness(false) { } @@ -145,14 +146,14 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, { if(is_data_type_quantized_asymmetric(a_to_use->info()->data_type()) && info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT) { - _asm_glue.configure(a_to_use, b, c, output, asm_info); - _fused_assembly_path = _asm_glue.is_configured(); + _asm_glue->configure(a_to_use, b, c, output, asm_info); + _fused_assembly_path = _asm_glue->is_configured(); } else { - _asm_glue.configure(a_to_use, b, nullptr, _fuse_output_stage ? &_mm_result_s32 : output, asm_info); + _asm_glue->configure(a_to_use, b, nullptr, _fuse_output_stage ? &_mm_result_s32 : output, asm_info); } - _assembly_path = _asm_glue.is_configured(); + _assembly_path = _asm_glue->is_configured(); break; } default: @@ -510,9 +511,9 @@ void NEGEMMLowpMatrixMultiplyCore::run() } // Run GEMM - if(_asm_glue.is_configured()) + if(_asm_glue->is_configured()) { - _asm_glue.run(); + _asm_glue->run(); } else { @@ -575,21 +576,21 @@ void NEGEMMLowpMatrixMultiplyCore::prepare() { const bool original_b_managed_by_weights_manager = _weights_manager && _weights_manager->are_weights_managed(_original_b); // Run assembly reshape - if(_asm_glue.is_configured()) + if(_asm_glue->is_configured()) { if(!original_b_managed_by_weights_manager) { ARM_COMPUTE_ERROR_ON(!_original_b->is_used()); } - _asm_glue.prepare(); + _asm_glue->prepare(); if(!original_b_managed_by_weights_manager) { _original_b->mark_as_unused(); } } // Run non-assembly reshape - else if(_reshape_b_only_on_first_run && !_run_vector_matrix_multiplication && !_asm_glue.is_configured()) + else if(_reshape_b_only_on_first_run && !_run_vector_matrix_multiplication && !_asm_glue->is_configured()) { if(!original_b_managed_by_weights_manager) { diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp index 265df9246f..bd3bdd6a26 100644 --- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp @@ -28,13 +28,13 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "src/core/CPP/Validate.h" #include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" #include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" #include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" #include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h" +#include "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "src/core/NEON/kernels/convolution/common/utils.hpp" #include "src/core/NEON/kernels/convolution/winograd/winograd.hpp" -- cgit v1.2.1