diff options
Diffstat (limited to 'arm_compute/runtime/NEON/functions')
4 files changed, 25 insertions, 26 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index f2b6ef77bd..5279995be4 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -25,7 +25,6 @@ #define __ARM_COMPUTE_NEGEMM_H__ #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" @@ -35,6 +34,8 @@ #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/NEON/AssemblyHelper.h" + #include <memory> namespace arm_compute @@ -73,19 +74,19 @@ public: void run() override; private: - MemoryGroup _memory_group; - NEGEMMInterleave4x4Kernel _interleave_kernel; - NEGEMMTranspose1xWKernel _transpose_kernel; - NEGEMMMatrixMultiplyKernel _mm_kernel; - std::unique_ptr<NEGEMMAssemblyBaseKernel> _mm_optimised_kernel; - NEGEMMMatrixAdditionKernel _ma_kernel; - Tensor _tmp_a; - Tensor _tmp_b; - Tensor _workspace; - bool _run_vector_matrix_multiplication; - bool _run_addition; - bool _is_first_run; - bool _reshape_b_only_on_first_run; + MemoryGroup _memory_group; + NEGEMMInterleave4x4Kernel _interleave_kernel; + NEGEMMTranspose1xWKernel _transpose_kernel; + NEGEMMMatrixMultiplyKernel _mm_kernel; + AssemblyKernelGlueF32 _asm_glue; + NEGEMMMatrixAdditionKernel _ma_kernel; + Tensor _tmp_a; + Tensor _tmp_b; + Tensor _workspace; + bool _run_vector_matrix_multiplication; + bool _run_addition; + bool _is_first_run; + bool _reshape_b_only_on_first_run; }; } #endif /*__ARM_COMPUTE_NEGEMM_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h index ac5f4caa78..4ae8ee1fb3 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h @@ -36,6 +36,7 @@ #include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/NEON/AssemblyHelper.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" #include "arm_compute/runtime/Tensor.h" @@ -149,22 +150,14 @@ private: * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped */ void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output, bool is_interleaved, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo()); - /** Prepare the appropriate assembly optimized kernel - * - * @param[in] ci CPU information - * @param[in] M M parameter of matrix multiplication - * @param[in] N N parameter of matrix multiplication - * @param[in] K K parameter of matrix multiplication - */ - void configure_asm_mm(const struct CPUInfo &ci, int M, int N, int K); private: + AssemblyKernelGlueF32 _asm_glue; MemoryGroup _memory_group; NEIm2ColKernel _input_im2col_kernel; NEGEMMInterleave4x4Kernel _input_interleave_kernel; NEConvolutionLayerReshapeWeights _reshape_weights; NEGEMMMatrixMultiplyKernel _mm_kernel; - std::unique_ptr<NEGEMMAssemblyBaseKernel> _mm_optimised_kernel; NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage; NECol2ImKernel _output_col2im_kernel; diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h index 3d213a7668..f09c94e726 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,7 @@ #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/NEON/AssemblyHelper.h" #include "arm_compute/runtime/Tensor.h" #include <memory> @@ -58,6 +59,8 @@ public: private: MemoryGroup _memory_group; + AssemblyKernelGlueU8U32 _asm_glue_unsigned; + AssemblyKernelGlueS8S32 _asm_glue_signed; std::unique_ptr<INEKernel> _mm_kernel; std::unique_ptr<INEKernel> _mtx_a_reshape_kernel; std::unique_ptr<INEKernel> _mtx_b_reshape_kernel; diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h index eddb3a26b7..95776f829a 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,7 @@ #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/NEON/AssemblyHelper.h" #include "arm_compute/runtime/Tensor.h" #include <memory> @@ -48,7 +49,6 @@ class ITensor; * otherwise if the DOT product instruction is available: * * -# @ref NEGEMMInterleaveBlockedKernel - * -# @ref NEGEMMLowpAArch64V8P4Kernel * -# @ref NEGEMMLowpOffsetContributionKernel * */ @@ -90,6 +90,8 @@ public: private: MemoryGroup _memory_group; + AssemblyKernelGlueU8U32 _asm_glue_unsigned; + AssemblyKernelGlueS8S32 _asm_glue_signed; std::unique_ptr<INEKernel> _mm_kernel; std::unique_ptr<INEKernel> _mtx_a_reshape_kernel; std::unique_ptr<INEKernel> _mtx_b_reshape_kernel; |