diff options
author | Michele Di Giorgio <michele.digiorgio@arm.com> | 2021-06-21 12:00:43 +0100 |
---|---|---|
committer | Michele Di Giorgio <michele.digiorgio@arm.com> | 2021-06-29 13:29:01 +0000 |
commit | 93b75e0c072c3cc5654fcdf6aed1068b40012081 (patch) | |
tree | 08acbf1bcafaa326bea1d8e472ad66b955c7c17f /arm_compute/runtime/NEON/functions/NEGEMM.h | |
parent | 5fdde99f4271891a40c02cd1e89f1344aa84583a (diff) | |
download | ComputeLibrary-93b75e0c072c3cc5654fcdf6aed1068b40012081.tar.gz |
Port NEGEMM to memory injecting interface (Part 1)
- Start porting NEGEMM to the new API
- Port NEGEMMInterleave4x4Kernel to the new API
- Port NEGEMMMatrixAdditionKernel to the new API
- Port NEGEMMTranspose1xWKernel to the new API
- Remove padding from NEGEMMMatrixAdditionKernel
- Remove unused INESimpleKernel and ICPPSimpleKernel
Partially resolves: COMPMID-4402
Change-Id: I63edadddfe00a54586e5384d6a0211db25ae9042
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5857
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/runtime/NEON/functions/NEGEMM.h')
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEGEMM.h | 55 |
1 files changed, 8 insertions, 47 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index 6c5be0eb5e..c1ae11bcbf 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -29,40 +29,26 @@ #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/IWeightsManager.h" #include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" -#include "arm_compute/runtime/Tensor.h" -#include "src/core/helpers/MemoryHelpers.h" #include <memory> namespace arm_compute { -// Forward declarations -class NEGEMMInterleave4x4Kernel; -class NEGEMMMatrixAdditionKernel; -class NEGEMMMatrixMultiplyKernel; -class NEGEMMTranspose1xWKernel; -namespace cpu -{ -class CpuGemmAssemblyDispatch; -} - /** Basic function to execute GEMM. This function calls the following kernels: * * If optimized assembly is available: * -# @ref cpu::CpuGemmAssemblyDispatch - * -# @ref NEActivationLayer (if alpha != 1.0) + * -# @ref cpu::CpuActivation (if alpha != 1.0) * Else: - * -# @ref NEGEMMInterleave4x4Kernel (if the output tensor is a matrix) - * -# @ref NEGEMMTranspose1xWKernel (if the output tensor is a matrix) + * -# @ref cpu::kernels::CpuGemmInterleave4x4Kernel (if the output tensor is a matrix) + * -# @ref cpu::kernels::CpuGemmTranspose1xWKernel (if the output tensor is a matrix) * -# @ref NEGEMMMatrixMultiplyKernel * In both cases: - * -# @ref NEGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0 and is not reshaped once) + * -# @ref cpu::kernels::CpuGemmMatrixAdditionKernel (if c != nullptr and beta != 0.0 and is not reshaped once) * Else: - * -# @ref NEArithmeticAddition (if c != nullptr and is reshaped once and not optimized assembly in place) + * -# @ref cpu::CpuAdd (if c != nullptr and is reshaped once and not optimized assembly in place) * - * -# @ref NEActivationLayer (if activation is specified in GEMMInfo) + * -# @ref cpu::CpuActivation (if activation is specified in GEMMInfo) */ class NEGEMM : public IFunction { @@ -117,33 +103,8 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - IWeightsManager *_weights_manager; - std::unique_ptr<NEGEMMInterleave4x4Kernel> _interleave_kernel; - std::unique_ptr<NEGEMMTranspose1xWKernel> _transpose_kernel; - std::unique_ptr<NEGEMMMatrixMultiplyKernel> _mm_kernel; - std::unique_ptr<cpu::CpuGemmAssemblyDispatch> _asm_glue; - std::unique_ptr<NEGEMMMatrixAdditionKernel> _ma_kernel; - NEActivationLayer _alpha_scale_func; - NEArithmeticAddition _add_bias; - NEActivationLayer _activation_func; - - Tensor _tmp_a; - Tensor _tmp_b; - Tensor _tmp_d; - const ITensor *_original_b; - bool _run_vector_matrix_multiplication; - bool _run_alpha_scale; - bool _run_addition; - bool _run_bias_addition; - bool _run_activation; - bool _reshape_b_only_on_first_run; - bool _is_prepared; - - ITensorPack _asm_glue_run_pack; - ITensorPack _asm_glue_prep_pack; - WorkspaceData<Tensor> _asm_glue_workspace; - experimental::MemoryRequirements _aux_mem_req; + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEGEMM_H */ |