From 2e5fd637205770ec5e11096e6e19b8efc67d544e Mon Sep 17 00:00:00 2001 From: SiCongLi Date: Mon, 2 Mar 2020 15:39:15 +0000 Subject: COMPMID-3098 Fuse Relu and Bounded Relu with FullyConnected NEON Change-Id: Id28062445590d6c06b35f7d7434eb38393ae94a7 Signed-off-by: SiCongLi Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2875 Comments-Addressed: Arm Jenkins Reviewed-by: Michele Di Giorgio Reviewed-by: Gian Marco Iodice Tested-by: Arm Jenkins --- arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h | 8 ++++---- .../runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h | 14 +++++++++----- 2 files changed, 13 insertions(+), 9 deletions(-) (limited to 'arm_compute/runtime/NEON') diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h index db09da45ee..b14650c0e9 100644 --- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -162,9 +162,9 @@ public: void prepare() override; private: - void configure_fc_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output); - void configure_conv_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output); - void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output); + void configure_fc_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act); + void configure_conv_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act); + void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act); MemoryGroup _memory_group; IWeightsManager *_weights_manager; @@ -182,7 +182,7 @@ private: bool _are_weights_converted; bool _are_weights_reshaped; bool _is_fc_after_conv; - bool _is_quantized; + bool _is_quantized_asymmetric; bool _is_prepared; }; } // namespace arm_compute diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h index 508159eb77..74dedcf4c5 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -28,9 +28,12 @@ #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" #include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -60,7 +63,7 @@ class NEGEMMLowpMatrixMultiplyCore : public IFunction { public: /** Constructor */ - NEGEMMLowpMatrixMultiplyCore(std::shared_ptr memory_manager = nullptr); + NEGEMMLowpMatrixMultiplyCore(std::shared_ptr memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEGEMMLowpMatrixMultiplyCore(const NEGEMMLowpMatrixMultiplyCore &) = delete; /** Default move constructor */ @@ -109,10 +112,11 @@ public: private: MemoryGroup _memory_group; + IWeightsManager *_weights_manager; NEGEMMAssemblyDispatch _asm_glue; - std::unique_ptr _mm_kernel; - std::unique_ptr _mtx_a_reshape_kernel; - std::unique_ptr _mtx_b_reshape_kernel; + NEGEMMLowpMatrixMultiplyKernel _mm_kernel; + NEGEMMInterleave4x4Kernel _mtx_a_reshape_kernel; + NEGEMMTranspose1xWKernel _mtx_b_reshape_kernel; NEGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel; NEGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel; NEGEMMLowpOffsetContributionKernel _offset_contribution_kernel; -- cgit v1.2.1