diff options
-rw-r--r-- | arm_compute/runtime/NEON/NEFunctions.h | 1 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEGEMM.h | 6 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEGEMMConv2d.h | 19 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h | 5 | ||||
-rw-r--r-- | docs/00_introduction.dox | 3 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEGEMM.cpp | 18 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp | 2 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h (renamed from arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h) | 6 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEGEMMConv2d.cpp | 14 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp | 27 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp | 2 |
11 files changed, 58 insertions, 45 deletions
diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h index 5ac94102fc..f35144481d 100644 --- a/arm_compute/runtime/NEON/NEFunctions.h +++ b/arm_compute/runtime/NEON/NEFunctions.h @@ -75,7 +75,6 @@ #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" #include "arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/NEON/functions/NEGEMMConv2d.h" #include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index 645ab56417..124f027227 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -30,17 +30,19 @@ #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/Tensor.h" #include <memory> namespace arm_compute { +// Forward declarations class NEGEMMInterleave4x4Kernel; class NEGEMMMatrixAdditionKernel; class NEGEMMMatrixMultiplyKernel; class NEGEMMTranspose1xWKernel; +class NEGEMMAssemblyDispatch; + /** Basic function to execute GEMM on NEON. This function calls the following NEON kernels: * * If optimized assembly is available: @@ -112,7 +114,7 @@ private: std::unique_ptr<NEGEMMInterleave4x4Kernel> _interleave_kernel; std::unique_ptr<NEGEMMTranspose1xWKernel> _transpose_kernel; std::unique_ptr<NEGEMMMatrixMultiplyKernel> _mm_kernel; - NEGEMMAssemblyDispatch _asm_glue; + std::unique_ptr<NEGEMMAssemblyDispatch> _asm_glue; std::unique_ptr<NEGEMMMatrixAdditionKernel> _ma_kernel; NEActivationLayer _alpha_scale_func; NEArithmeticAddition _add_bias; diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h index 7cae39397f..2b3c162eab 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h @@ -28,7 +28,6 @@ #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/NEON/functions/NEPermute.h" #include "arm_compute/runtime/Tensor.h" @@ -37,6 +36,8 @@ namespace arm_compute { // Forward declarations class ITensor; +class NEGEMMAssemblyDispatch; + /** Basic function to compute the convolution layer. This function calls the following NEON kernels/functions: * * Supports only NHWC data layout @@ -60,6 +61,8 @@ public: NEGEMMConv2d &operator=(const NEGEMMConv2d &) = delete; /** Default move assignment operator */ NEGEMMConv2d &operator=(NEGEMMConv2d &&) = default; + /** Destructor */ + ~NEGEMMConv2d(); /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -96,13 +99,13 @@ public: void prepare() override; private: - NEGEMMAssemblyDispatch _gemm_asm_func; - NEActivationLayer _activation_func; - NEPermute _weights_permute_func; - const ITensor *_original_weights; - Tensor _permuted_weights; - bool _is_prepared; - bool _run_activation; + std::unique_ptr<NEGEMMAssemblyDispatch> _gemm_asm_func; + NEActivationLayer _activation_func; + NEPermute _weights_permute_func; + const ITensor *_original_weights; + Tensor _permuted_weights; + bool _is_prepared; + bool _run_activation; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEGEMMCONV2D_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h index cb1d6bd782..8eea9d7d24 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h @@ -27,8 +27,8 @@ #include "NEActivationLayer.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/IWeightsManager.h" #include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/Tensor.h" #include <memory> @@ -45,6 +45,7 @@ class NEGEMMLowpOffsetContributionOutputStageKernel; class NEGEMMLowpMatrixAReductionKernel; class NEGEMMLowpMatrixBReductionKernel; class NEGEMMTranspose1xWKernel; +class NEGEMMAssemblyDispatch; /** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following NEON kernels if the DOT product instruction is not available: * @@ -115,7 +116,7 @@ public: private: MemoryGroup _memory_group; IWeightsManager *_weights_manager; - NEGEMMAssemblyDispatch _asm_glue; + std::unique_ptr<NEGEMMAssemblyDispatch> _asm_glue; std::unique_ptr<NEGEMMLowpMatrixMultiplyKernel> _mm_kernel; std::unique_ptr<NEGEMMInterleave4x4Kernel> _mtx_a_reshape_kernel; std::unique_ptr<NEGEMMTranspose1xWKernel> _mtx_b_reshape_kernel; diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox index 7ad4831082..ecdd72c436 100644 --- a/docs/00_introduction.dox +++ b/docs/00_introduction.dox @@ -95,6 +95,7 @@ v21.02 Public major release - NEGEMMInterleave4x4 - NEGEMMTranspose1xW - NEComputeAllAnchors / CLComputeAllAnchors + - NEGEMMAssemblyDispatch - Removed kernels: - NEGEMMMatrixVectorMultiplyKernel - NELocallyConnectedMatrixMultiplyKernel / CLLocallyConnectedMatrixMultiplyKernel @@ -486,7 +487,7 @@ v20.05 Public major release - @ref NEDepthConvertLayerKernel - @ref NEDepthConvertLayer - @ref NEGEMMConvolutionLayer - - @ref NEGEMMAssemblyDispatch + - NEGEMMAssemblyDispatch - Added new data type QASYMM8_SIGNED support for: - @ref CLDirectConvolutionLayer - @ref CLDeconvolutionLayer diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index 03f5aa37c1..6d83480cb9 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -31,7 +31,6 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/TensorAllocator.h" #include "src/core/CPP/Validate.h" #include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" @@ -39,6 +38,7 @@ #include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" #include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "src/core/helpers/AutoConfiguration.h" +#include "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include <cmath> @@ -61,7 +61,7 @@ AsmGemmInfo init_assembly_metadata(const GEMMInfo &info) } // namespace NEGEMM::NEGEMM(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager) - : _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(memory_manager, weights_manager), _ma_kernel(), + : _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(std::make_unique<NEGEMMAssemblyDispatch>()), _ma_kernel(), _alpha_scale_func(nullptr), _add_bias(), _activation_func(), _tmp_a(), _tmp_b(), _tmp_d(), _original_b(nullptr), _run_vector_matrix_multiplication(false), _run_alpha_scale(false), _run_addition(false), _run_bias_addition(false), _run_activation(false), _reshape_b_only_on_first_run(false), _is_prepared(false) { @@ -90,8 +90,8 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe if(run_optimised) { const ITensor *c_to_use = is_c_bias ? c : nullptr; - _asm_glue.configure(a, b, c_to_use, d, asm_info); - ARM_COMPUTE_ERROR_ON(!_asm_glue.is_configured()); + _asm_glue->configure(a, b, c_to_use, d, asm_info); + ARM_COMPUTE_ERROR_ON(!_asm_glue->is_configured()); // Scale product by alpha if(_run_alpha_scale) @@ -312,9 +312,9 @@ void NEGEMM::run() MemoryGroupResourceScope scope_mg(_memory_group); - if(_asm_glue.is_configured()) + if(_asm_glue->is_configured()) { - _asm_glue.run(); + _asm_glue->run(); if(_run_alpha_scale) { _alpha_scale_func.run(); @@ -361,20 +361,20 @@ void NEGEMM::prepare() if(!_is_prepared) { const bool original_b_managed_by_weights_manager = _weights_manager && _weights_manager->are_weights_managed(_original_b); - if(_asm_glue.is_configured()) + if(_asm_glue->is_configured()) { if(!original_b_managed_by_weights_manager) { ARM_COMPUTE_ERROR_ON(!_original_b->is_used()); } - _asm_glue.prepare(); + _asm_glue->prepare(); if(!original_b_managed_by_weights_manager) { _original_b->mark_as_unused(); } } - else if(_reshape_b_only_on_first_run && !_run_vector_matrix_multiplication && !_asm_glue.is_configured()) + else if(_reshape_b_only_on_first_run && !_run_vector_matrix_multiplication && !_asm_glue->is_configured()) { if(!original_b_managed_by_weights_manager) { diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp index b54389cf5f..1c86393406 100644 --- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp +++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" +#include "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "src/core/CPP/Validate.h" diff --git a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h index 8f9498d0f5..466e60183a 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h +++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H -#define ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H +#ifndef SRC_NEGEMMASSEMBLYDISPATCH_H +#define SRC_NEGEMMASSEMBLYDISPATCH_H #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" @@ -122,4 +122,4 @@ private: IWeightsManager *_weights_manager; /**< Pointer to the weights manager */ }; } // namespace arm_compute -#endif /* ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H */ +#endif /* SRC_NEGEMMASSEMBLYDISPATCH_H */ diff --git a/src/runtime/NEON/functions/NEGEMMConv2d.cpp b/src/runtime/NEON/functions/NEGEMMConv2d.cpp index 860b6bb4e1..b8349d98db 100644 --- a/src/runtime/NEON/functions/NEGEMMConv2d.cpp +++ b/src/runtime/NEON/functions/NEGEMMConv2d.cpp @@ -22,9 +22,11 @@ * SOFTWARE. */ #include "arm_compute/runtime/NEON/functions/NEGEMMConv2d.h" + #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include <set> @@ -81,9 +83,13 @@ AsmGemmInfo init_assembly_metadata(const Conv2dInfo &info, bool is_indirect) } // namespace NEGEMMConv2d::NEGEMMConv2d(const std::shared_ptr<IMemoryManager> &memory_manager) - : _gemm_asm_func(memory_manager), _activation_func(), _weights_permute_func(), _original_weights(nullptr), _permuted_weights(), _is_prepared(false), _run_activation(false) + : _gemm_asm_func(std::make_unique<NEGEMMAssemblyDispatch>(memory_manager)), _activation_func(), _weights_permute_func(), _original_weights(nullptr), _permuted_weights(), _is_prepared(false), + _run_activation(false) { } + +NEGEMMConv2d::~NEGEMMConv2d() = default; + void NEGEMMConv2d::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const Conv2dInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); @@ -101,10 +107,10 @@ void NEGEMMConv2d::configure(ITensor *input, const ITensor *weights, const ITens { asm_info.output_stage = calculate_output_stage_metadata(input->info(), weights->info(), output->info(), info.act_info); } - _gemm_asm_func.configure(input, &_permuted_weights, biases, output, asm_info); + _gemm_asm_func->configure(input, &_permuted_weights, biases, output, asm_info); // Configure activation - if(info.act_info.enabled() && !_gemm_asm_func.is_activation_supported(info.act_info)) + if(info.act_info.enabled() && !_gemm_asm_func->is_activation_supported(info.act_info)) { _activation_func.configure(output, nullptr, info.act_info); _run_activation = true; @@ -150,7 +156,7 @@ void NEGEMMConv2d::run() { prepare(); - _gemm_asm_func.run(); + _gemm_asm_func->run(); if(_run_activation) { _activation_func.run(); diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp index 50c7fe4c66..921626f0fe 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp @@ -42,6 +42,7 @@ #include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" #include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" #include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" namespace arm_compute { @@ -65,10 +66,10 @@ using namespace arm_compute::misc::shape_calculator; NEGEMMLowpMatrixMultiplyCore::~NEGEMMLowpMatrixMultiplyCore() = default; NEGEMMLowpMatrixMultiplyCore::NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager) - : _memory_group(memory_manager), _weights_manager(weights_manager), _asm_glue(memory_manager, weights_manager), _mm_kernel(), _mtx_a_reshape_kernel(), _mtx_b_reshape_kernel(), - _mtx_a_reduction_kernel(), _mtx_b_reduction_kernel(), _offset_contribution_kernel(), _offset_contribution_output_stage_kernel(), _activation_func(), _convert_to_signed_asymm(), - _convert_from_signed_asymm(), _vector_sum_col(), _vector_sum_row(), _tmp_a(), _tmp_b(), _mm_result_s32(), _signed_a(), _signed_output(), _original_b(nullptr), _a_offset(0), _b_offset(0), - _run_vector_matrix_multiplication(false), _assembly_path(false), _fused_assembly_path(false), _reshape_b_only_on_first_run(false), _is_prepared(false), _fuse_output_stage(false), + : _memory_group(memory_manager), _weights_manager(weights_manager), _asm_glue(std::make_unique<NEGEMMAssemblyDispatch>(memory_manager, weights_manager)), _mm_kernel(), _mtx_a_reshape_kernel(), + _mtx_b_reshape_kernel(), _mtx_a_reduction_kernel(), _mtx_b_reduction_kernel(), _offset_contribution_kernel(), _offset_contribution_output_stage_kernel(), _activation_func(), + _convert_to_signed_asymm(), _convert_from_signed_asymm(), _vector_sum_col(), _vector_sum_row(), _tmp_a(), _tmp_b(), _mm_result_s32(), _signed_a(), _signed_output(), _original_b(nullptr), _a_offset(0), + _b_offset(0), _run_vector_matrix_multiplication(false), _assembly_path(false), _fused_assembly_path(false), _reshape_b_only_on_first_run(false), _is_prepared(false), _fuse_output_stage(false), _run_activation(false), _flip_signedness(false) { } @@ -145,14 +146,14 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, { if(is_data_type_quantized_asymmetric(a_to_use->info()->data_type()) && info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT) { - _asm_glue.configure(a_to_use, b, c, output, asm_info); - _fused_assembly_path = _asm_glue.is_configured(); + _asm_glue->configure(a_to_use, b, c, output, asm_info); + _fused_assembly_path = _asm_glue->is_configured(); } else { - _asm_glue.configure(a_to_use, b, nullptr, _fuse_output_stage ? &_mm_result_s32 : output, asm_info); + _asm_glue->configure(a_to_use, b, nullptr, _fuse_output_stage ? &_mm_result_s32 : output, asm_info); } - _assembly_path = _asm_glue.is_configured(); + _assembly_path = _asm_glue->is_configured(); break; } default: @@ -510,9 +511,9 @@ void NEGEMMLowpMatrixMultiplyCore::run() } // Run GEMM - if(_asm_glue.is_configured()) + if(_asm_glue->is_configured()) { - _asm_glue.run(); + _asm_glue->run(); } else { @@ -575,21 +576,21 @@ void NEGEMMLowpMatrixMultiplyCore::prepare() { const bool original_b_managed_by_weights_manager = _weights_manager && _weights_manager->are_weights_managed(_original_b); // Run assembly reshape - if(_asm_glue.is_configured()) + if(_asm_glue->is_configured()) { if(!original_b_managed_by_weights_manager) { ARM_COMPUTE_ERROR_ON(!_original_b->is_used()); } - _asm_glue.prepare(); + _asm_glue->prepare(); if(!original_b_managed_by_weights_manager) { _original_b->mark_as_unused(); } } // Run non-assembly reshape - else if(_reshape_b_only_on_first_run && !_run_vector_matrix_multiplication && !_asm_glue.is_configured()) + else if(_reshape_b_only_on_first_run && !_run_vector_matrix_multiplication && !_asm_glue->is_configured()) { if(!original_b_managed_by_weights_manager) { diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp index 265df9246f..bd3bdd6a26 100644 --- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp @@ -28,13 +28,13 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "src/core/CPP/Validate.h" #include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" #include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" #include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" #include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h" +#include "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "src/core/NEON/kernels/convolution/common/utils.hpp" #include "src/core/NEON/kernels/convolution/winograd/winograd.hpp" |