diff options
author | Sang-Hoon Park <sang-hoon.park@arm.com> | 2021-05-12 13:59:10 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-05-14 04:26:10 +0000 |
commit | 4f7693d8757cf12c33f049c61c63bc689379ab84 (patch) | |
tree | f37317dd3ffe7394886b045c92e08e31863e6954 /arm_compute/runtime/NEON | |
parent | 13c497a8a9a4aa9353719afe53ccc7db50da74fe (diff) | |
download | ComputeLibrary-4f7693d8757cf12c33f049c61c63bc689379ab84.tar.gz |
Rename NEGEMMAssembly to CpuGemmAssembly
- Dispatch, WrapperKernel has been renamed and moved
- Header files for assembly kernels have been moved
Partially Resolves: COMPMID-4506
Change-Id: I6c2f391bb95ba1ce7ca195d0efa57b9c3225570f
Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5637
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/runtime/NEON')
4 files changed, 33 insertions, 24 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index d4a9f68beb..9df2e08956 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -41,12 +41,15 @@ class NEGEMMInterleave4x4Kernel; class NEGEMMMatrixAdditionKernel; class NEGEMMMatrixMultiplyKernel; class NEGEMMTranspose1xWKernel; -class NEGEMMAssemblyDispatch; +namespace cpu +{ +class CpuGemmAssemblyDispatch; +} /** Basic function to execute GEMM. This function calls the following kernels: * * If optimized assembly is available: - * -# @ref NEGEMMAssemblyDispatch + * -# @ref cpu::CpuGemmAssemblyDispatch * -# @ref NEActivationLayer (if alpha != 1.0) * Else: * -# @ref NEGEMMInterleave4x4Kernel (if the output tensor is a matrix) @@ -119,16 +122,16 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - IWeightsManager *_weights_manager; - std::unique_ptr<NEGEMMInterleave4x4Kernel> _interleave_kernel; - std::unique_ptr<NEGEMMTranspose1xWKernel> _transpose_kernel; - std::unique_ptr<NEGEMMMatrixMultiplyKernel> _mm_kernel; - std::unique_ptr<NEGEMMAssemblyDispatch> _asm_glue; - std::unique_ptr<NEGEMMMatrixAdditionKernel> _ma_kernel; - NEActivationLayer _alpha_scale_func; - NEArithmeticAddition _add_bias; - NEActivationLayer _activation_func; + MemoryGroup _memory_group; + IWeightsManager *_weights_manager; + std::unique_ptr<NEGEMMInterleave4x4Kernel> _interleave_kernel; + std::unique_ptr<NEGEMMTranspose1xWKernel> _transpose_kernel; + std::unique_ptr<NEGEMMMatrixMultiplyKernel> _mm_kernel; + std::unique_ptr<cpu::CpuGemmAssemblyDispatch> _asm_glue; + std::unique_ptr<NEGEMMMatrixAdditionKernel> _ma_kernel; + NEActivationLayer _alpha_scale_func; + NEArithmeticAddition _add_bias; + NEActivationLayer _activation_func; Tensor _tmp_a; Tensor _tmp_b; diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h index b2ffd038de..6c71f0e188 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h @@ -36,13 +36,16 @@ namespace arm_compute { // Forward declarations class ITensor; -class NEGEMMAssemblyDispatch; +namespace cpu +{ +class CpuGemmAssemblyDispatch; +} /** Basic function to compute the convolution layer. This function calls the following kernels/functions: * * Supports only NHWC data layout * - * -# @ref NEGEMMAssemblyDispatch + * -# @ref cpu::CpuGemmAssemblyDispatch * -# @ref NEActivationLayer, in case activation cannot be fused in the assembly dispatch * * Weights are transformed from OHWI to HWIO format using the following kernels: @@ -111,13 +114,13 @@ public: void prepare() override; private: - std::unique_ptr<NEGEMMAssemblyDispatch> _gemm_asm_func; - NEActivationLayer _activation_func; - NEPermute _weights_permute_func; - const ITensor *_original_weights; - Tensor _permuted_weights; - bool _is_prepared; - bool _run_activation; + std::unique_ptr<cpu::CpuGemmAssemblyDispatch> _gemm_asm_func; + NEActivationLayer _activation_func; + NEPermute _weights_permute_func; + const ITensor *_original_weights; + Tensor _permuted_weights; + bool _is_prepared; + bool _run_activation; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEGEMMCONV2D_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h index 780723e752..a292712bd7 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h @@ -44,7 +44,10 @@ class NEGEMMLowpOffsetContributionOutputStageKernel; class NEGEMMLowpMatrixAReductionKernel; class NEGEMMLowpMatrixBReductionKernel; class NEGEMMTranspose1xWKernel; -class NEGEMMAssemblyDispatch; +namespace cpu +{ +class CpuGemmAssemblyDispatch; +} /** Basic function to execute GEMMLowpMatrixMultiplyCore. This function calls the following kernels if the DOT product instruction is not available: * @@ -135,7 +138,7 @@ public: private: MemoryGroup _memory_group; IWeightsManager *_weights_manager; - std::unique_ptr<NEGEMMAssemblyDispatch> _asm_glue; + std::unique_ptr<cpu::CpuGemmAssemblyDispatch> _asm_glue; std::unique_ptr<NEGEMMLowpMatrixMultiplyKernel> _mm_kernel; std::unique_ptr<NEGEMMInterleave4x4Kernel> _mtx_a_reshape_kernel; std::unique_ptr<NEGEMMTranspose1xWKernel> _mtx_b_reshape_kernel; diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h index 77f9093ed4..f9ebf608cb 100644 --- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h @@ -47,7 +47,7 @@ class ICPPKernel; * -# @ref NEWinogradLayerTransformWeightsKernel (executed only once in the first call to the run() method ) * -# @ref NEWinogradLayerTransformInputKernel * -# @ref NEWinogradLayerTransformOutputKernel - * -# @ref NEGEMMAssemblyDispatch + * -# @ref cpu::CpuGemmAssemblyDispatch * -# @ref CPPPermute (three times: weights, input and output) * * @note Some Winograd configurations (i.e. F(2x2, 5x5), F(4x4, 5x5)) are supported only with enable_fast_math = true |