diff options
Diffstat (limited to 'arm_compute/runtime/NEON/functions/NEGEMM.h')
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEGEMM.h | 55 |
1 files changed, 8 insertions, 47 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index 6c5be0eb5e..c1ae11bcbf 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -29,40 +29,26 @@ #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/IWeightsManager.h" #include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" -#include "arm_compute/runtime/Tensor.h" -#include "src/core/helpers/MemoryHelpers.h" #include <memory> namespace arm_compute { -// Forward declarations -class NEGEMMInterleave4x4Kernel; -class NEGEMMMatrixAdditionKernel; -class NEGEMMMatrixMultiplyKernel; -class NEGEMMTranspose1xWKernel; -namespace cpu -{ -class CpuGemmAssemblyDispatch; -} - /** Basic function to execute GEMM. This function calls the following kernels: * * If optimized assembly is available: * -# @ref cpu::CpuGemmAssemblyDispatch - * -# @ref NEActivationLayer (if alpha != 1.0) + * -# @ref cpu::CpuActivation (if alpha != 1.0) * Else: - * -# @ref NEGEMMInterleave4x4Kernel (if the output tensor is a matrix) - * -# @ref NEGEMMTranspose1xWKernel (if the output tensor is a matrix) + * -# @ref cpu::kernels::CpuGemmInterleave4x4Kernel (if the output tensor is a matrix) + * -# @ref cpu::kernels::CpuGemmTranspose1xWKernel (if the output tensor is a matrix) * -# @ref NEGEMMMatrixMultiplyKernel * In both cases: - * -# @ref NEGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0 and is not reshaped once) + * -# @ref cpu::kernels::CpuGemmMatrixAdditionKernel (if c != nullptr and beta != 0.0 and is not reshaped once) * Else: - * -# @ref NEArithmeticAddition (if c != nullptr and is reshaped once and not optimized assembly in place) + * -# @ref cpu::CpuAdd (if c != nullptr and is reshaped once and not optimized assembly in place) * - * -# @ref NEActivationLayer (if activation is specified in GEMMInfo) + * -# @ref cpu::CpuActivation (if activation is specified in GEMMInfo) */ class NEGEMM : public IFunction { @@ -117,33 +103,8 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - IWeightsManager *_weights_manager; - std::unique_ptr<NEGEMMInterleave4x4Kernel> _interleave_kernel; - std::unique_ptr<NEGEMMTranspose1xWKernel> _transpose_kernel; - std::unique_ptr<NEGEMMMatrixMultiplyKernel> _mm_kernel; - std::unique_ptr<cpu::CpuGemmAssemblyDispatch> _asm_glue; - std::unique_ptr<NEGEMMMatrixAdditionKernel> _ma_kernel; - NEActivationLayer _alpha_scale_func; - NEArithmeticAddition _add_bias; - NEActivationLayer _activation_func; - - Tensor _tmp_a; - Tensor _tmp_b; - Tensor _tmp_d; - const ITensor *_original_b; - bool _run_vector_matrix_multiplication; - bool _run_alpha_scale; - bool _run_addition; - bool _run_bias_addition; - bool _run_activation; - bool _reshape_b_only_on_first_run; - bool _is_prepared; - - ITensorPack _asm_glue_run_pack; - ITensorPack _asm_glue_prep_pack; - WorkspaceData<Tensor> _asm_glue_workspace; - experimental::MemoryRequirements _aux_mem_req; + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEGEMM_H */ |