From d7316eb877cc4ff8573219374335e917b19a0203 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Wed, 16 Jun 2021 11:14:41 +0100 Subject: Port NEGEMMConv2d to memory injecting interface Resolves: COMPMID-4506, COMPMID-4570 Change-Id: I6d37a06da141f1fcfcaa8525322a319cb0234791 Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5824 Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- arm_compute/runtime/NEON/functions/NEGEMM.h | 15 +++--- arm_compute/runtime/NEON/functions/NEGEMMConv2d.h | 5 +- .../NEON/functions/NEGEMMLowpMatrixMultiplyCore.h | 63 ++-------------------- .../NEON/functions/NEWinogradConvolutionLayer.h | 15 +----- 4 files changed, 12 insertions(+), 86 deletions(-) (limited to 'arm_compute/runtime/NEON') diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index 6fa30bd545..6c5be0eb5e 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -32,6 +32,7 @@ #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" #include "arm_compute/runtime/Tensor.h" +#include "src/core/helpers/MemoryHelpers.h" #include @@ -105,14 +106,7 @@ public: void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEGEMM. * - * @param[in] a First input tensor info (Matrix or Vector A). Data types supported: BFLOAT16/F16/F32 - * @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a. - * @param[in] c Third input tensor info (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a. - * @param[out] output Output tensor info. Data type supported: same as @p a - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of matrix C - * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and - * if the reshape of matrix B should happen only for the first run + * Similar to @ref NEGEMM::configure() * * @return a status */ @@ -146,7 +140,10 @@ private: bool _reshape_b_only_on_first_run; bool _is_prepared; - ITensorPack _asm_glue_tensors{}; + ITensorPack _asm_glue_run_pack; + ITensorPack _asm_glue_prep_pack; + WorkspaceData _asm_glue_workspace; + experimental::MemoryRequirements _aux_mem_req; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEGEMM_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h index f39ce4dfa3..53ceb6d978 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h @@ -29,15 +29,12 @@ #include "arm_compute/runtime/IMemoryManager.h" #include + namespace arm_compute { // Forward declarations class ITensor; class ITensorInfo; -namespace cpu -{ -class CpuGemmAssemblyDispatch; -} /** Basic function to compute the convolution layer. This function calls the following kernels/functions: * diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h index dc9783f9eb..ff888760e1 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h @@ -24,32 +24,15 @@ #ifndef ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H #define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H -#include "NEActivationLayer.h" -#include "arm_compute/core/ITensorPack.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/IWeightsManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" #include namespace arm_compute { class ITensor; -class NEConvertQuantizedSignednessKernel; -class NEGEMMInterleave4x4Kernel; -class NEGEMMLowpMatrixMultiplyKernel; -class NEGEMMLowpOffsetContributionKernel; -class NEGEMMLowpOffsetContributionOutputStageKernel; -class NEGEMMLowpMatrixAReductionKernel; -class NEGEMMLowpMatrixBReductionKernel; -class NEGEMMTranspose1xWKernel; -namespace cpu -{ -class CpuGemmAssemblyDispatch; -} - /** Basic function to execute GEMMLowpMatrixMultiplyCore. This function calls the following kernels if the DOT product instruction is not available: * * -# @ref NEGEMMInterleave4x4Kernel @@ -119,14 +102,7 @@ public: void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info = GEMMInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixMultiplyCore * - * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED otherwise - * - * @param[in] a First input tensor info (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED. - * @param[in] b Second input tensor info (Matrix B). Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL. - * @param[in] c Third input tensor info (Matrix C). It can be a nullptr. Data type supported: S32 - * @param[in] output Output tensor info. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED - * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and - * if the reshape of matrix B should be executed only for the first run + * Similar to @ref NEGEMMLowpMatrixMultiplyCore::configure() * * @return a status */ @@ -137,41 +113,8 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - IWeightsManager *_weights_manager; - std::unique_ptr _asm_glue; - std::unique_ptr _mm_kernel; - std::unique_ptr _mtx_a_reshape_kernel; - std::unique_ptr _mtx_b_reshape_kernel; - std::unique_ptr _mtx_a_reduction_kernel; - std::unique_ptr _mtx_b_reduction_kernel; - std::unique_ptr _offset_contribution_kernel; - std::unique_ptr _offset_contribution_output_stage_kernel; - NEActivationLayer _activation_func; - std::unique_ptr _convert_to_signed_asymm; - std::unique_ptr _convert_from_signed_asymm; - - Tensor _vector_sum_col; - Tensor _vector_sum_row; - Tensor _tmp_a; - Tensor _tmp_b; - Tensor _mm_result_s32; - Tensor _signed_a; - Tensor _signed_output; - const ITensor *_original_b; - int32_t _a_offset; - int32_t _b_offset; - - bool _run_vector_matrix_multiplication; - bool _assembly_path; - bool _fused_assembly_path; - bool _reshape_b_only_on_first_run; - bool _is_prepared; - bool _fuse_output_stage; - bool _run_activation; - bool _flip_signedness; - - ITensorPack _asm_glue_tensors{}; + struct Impl; + std::unique_ptr _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h index f9ebf608cb..b02c4ed5b7 100644 --- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h @@ -96,20 +96,9 @@ public: void run() override; void prepare() override; - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer + /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradConvolutionLayer * - * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. - * Currently only 3x3 and 5x5 kernels are supported. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. - * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation - * available which may introduce a drop of accuracy as well. Default is false + * Similar to @ref NEWinogradConvolutionLayer::configure() * * @return a status */ -- cgit v1.2.1