diff options
Diffstat (limited to 'arm_compute')
4 files changed, 33 insertions, 24 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index d4a9f68beb..9df2e08956 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -41,12 +41,15 @@ class NEGEMMInterleave4x4Kernel; class NEGEMMMatrixAdditionKernel; class NEGEMMMatrixMultiplyKernel; class NEGEMMTranspose1xWKernel; -class NEGEMMAssemblyDispatch; +namespace cpu +{ +class CpuGemmAssemblyDispatch; +} /** Basic function to execute GEMM. This function calls the following kernels: * * If optimized assembly is available: - * -# @ref NEGEMMAssemblyDispatch + * -# @ref cpu::CpuGemmAssemblyDispatch * -# @ref NEActivationLayer (if alpha != 1.0) * Else: * -# @ref NEGEMMInterleave4x4Kernel (if the output tensor is a matrix) @@ -119,16 +122,16 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - IWeightsManager *_weights_manager; - std::unique_ptr<NEGEMMInterleave4x4Kernel> _interleave_kernel; - std::unique_ptr<NEGEMMTranspose1xWKernel> _transpose_kernel; - std::unique_ptr<NEGEMMMatrixMultiplyKernel> _mm_kernel; - std::unique_ptr<NEGEMMAssemblyDispatch> _asm_glue; - std::unique_ptr<NEGEMMMatrixAdditionKernel> _ma_kernel; - NEActivationLayer _alpha_scale_func; - NEArithmeticAddition _add_bias; - NEActivationLayer _activation_func; + MemoryGroup _memory_group; + IWeightsManager *_weights_manager; + std::unique_ptr<NEGEMMInterleave4x4Kernel> _interleave_kernel; + std::unique_ptr<NEGEMMTranspose1xWKernel> _transpose_kernel; + std::unique_ptr<NEGEMMMatrixMultiplyKernel> _mm_kernel; + std::unique_ptr<cpu::CpuGemmAssemblyDispatch> _asm_glue; + std::unique_ptr<NEGEMMMatrixAdditionKernel> _ma_kernel; + NEActivationLayer _alpha_scale_func; + NEArithmeticAddition _add_bias; + NEActivationLayer _activation_func; Tensor _tmp_a; Tensor _tmp_b; diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h index b2ffd038de..6c71f0e188 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h @@ -36,13 +36,16 @@ namespace arm_compute { // Forward declarations class ITensor; -class NEGEMMAssemblyDispatch; +namespace cpu +{ +class CpuGemmAssemblyDispatch; +} /** Basic function to compute the convolution layer. This function calls the following kernels/functions: * * Supports only NHWC data layout * - * -# @ref NEGEMMAssemblyDispatch + * -# @ref cpu::CpuGemmAssemblyDispatch * -# @ref NEActivationLayer, in case activation cannot be fused in the assembly dispatch * * Weights are transformed from OHWI to HWIO format using the following kernels: @@ -111,13 +114,13 @@ public: void prepare() override; private: - std::unique_ptr<NEGEMMAssemblyDispatch> _gemm_asm_func; - NEActivationLayer _activation_func; - NEPermute _weights_permute_func; - const ITensor *_original_weights; - Tensor _permuted_weights; - bool _is_prepared; - bool _run_activation; + std::unique_ptr<cpu::CpuGemmAssemblyDispatch> _gemm_asm_func; + NEActivationLayer _activation_func; + NEPermute _weights_permute_func; + const ITensor *_original_weights; + Tensor _permuted_weights; + bool _is_prepared; + bool _run_activation; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEGEMMCONV2D_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h index 780723e752..a292712bd7 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h @@ -44,7 +44,10 @@ class NEGEMMLowpOffsetContributionOutputStageKernel; class NEGEMMLowpMatrixAReductionKernel; class NEGEMMLowpMatrixBReductionKernel; class NEGEMMTranspose1xWKernel; -class NEGEMMAssemblyDispatch; +namespace cpu +{ +class CpuGemmAssemblyDispatch; +} /** Basic function to execute GEMMLowpMatrixMultiplyCore. This function calls the following kernels if the DOT product instruction is not available: * @@ -135,7 +138,7 @@ public: private: MemoryGroup _memory_group; IWeightsManager *_weights_manager; - std::unique_ptr<NEGEMMAssemblyDispatch> _asm_glue; + std::unique_ptr<cpu::CpuGemmAssemblyDispatch> _asm_glue; std::unique_ptr<NEGEMMLowpMatrixMultiplyKernel> _mm_kernel; std::unique_ptr<NEGEMMInterleave4x4Kernel> _mtx_a_reshape_kernel; std::unique_ptr<NEGEMMTranspose1xWKernel> _mtx_b_reshape_kernel; diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h index 77f9093ed4..f9ebf608cb 100644 --- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h @@ -47,7 +47,7 @@ class ICPPKernel; * -# @ref NEWinogradLayerTransformWeightsKernel (executed only once in the first call to the run() method ) * -# @ref NEWinogradLayerTransformInputKernel * -# @ref NEWinogradLayerTransformOutputKernel - * -# @ref NEGEMMAssemblyDispatch + * -# @ref cpu::CpuGemmAssemblyDispatch * -# @ref CPPPermute (three times: weights, input and output) * * @note Some Winograd configurations (i.e. F(2x2, 5x5), F(4x4, 5x5)) are supported only with enable_fast_math = true |