From ebcebf1dee7f8314976b1e0cabd62b4cf893d765 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Wed, 21 Oct 2020 00:04:14 +0100 Subject: COMPMID-3638: Move NEON kernels Signed-off-by: Michalis Spyrou Change-Id: Ieed3e4bc8be7fef80c90c5094599b477a56fc473 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4285 Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins --- src/runtime/NEON/functions/NELSTMLayer.cpp | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'src/runtime/NEON/functions/NELSTMLayer.cpp') diff --git a/src/runtime/NEON/functions/NELSTMLayer.cpp b/src/runtime/NEON/functions/NELSTMLayer.cpp index dca274acd2..48d69bd6fc 100644 --- a/src/runtime/NEON/functions/NELSTMLayer.cpp +++ b/src/runtime/NEON/functions/NELSTMLayer.cpp @@ -29,12 +29,24 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/common/LSTMParams.h" +#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" +#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" +#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" namespace arm_compute { using namespace arm_compute::misc::shape_calculator; using namespace arm_compute::utils::info_helpers; +NELSTMLayer::~NELSTMLayer() = default; + NELSTMLayer::NELSTMLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _fully_connected_input_gate(), _accum_input_gate1(), _subtract_input_gate(), _pixelwise_mul_input_gate(), _activation_input_gate(), _fully_connected_forget_gate(), _accum_forget_gate1(), _pixelwise_mul_forget_gate(), _activation_forget_gate(), _fully_connected_cell_state(), _gemm_cell_state1(), _transpose_cell_state(), @@ -575,8 +587,8 @@ Status NELSTMLayer::validate(const ITensorInfo *input, } // Validate copy kernel - ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(&cell_state_tmp, cell_state_out)); - ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(output_state_out, output)); + ARM_COMPUTE_RETURN_ON_ERROR(NECopy::validate(&cell_state_tmp, cell_state_out)); + ARM_COMPUTE_RETURN_ON_ERROR(NECopy::validate(output_state_out, output)); // Validate scratch concatenation std::vector inputs_vector_info_raw; @@ -646,7 +658,7 @@ void NELSTMLayer::run() } _fully_connected_cell_state.run(); - NEScheduler::get().schedule(&_transpose_cell_state, Window::DimY); + _transpose_cell_state.run(); _gemm_cell_state1.run(); _accum_cell_state1.run(); if(_is_layer_norm_lstm) @@ -691,8 +703,8 @@ void NELSTMLayer::run() } } - NEScheduler::get().schedule(&_copy_cell_state, Window::DimY); - NEScheduler::get().schedule(&_copy_output, Window::DimY); + _copy_cell_state.run(); + _copy_output.run(); _concat_scratch_buffer.run(); } -- cgit v1.2.1