From c9c62c2fa1c80ba7f11b0d0732740460dfa00e74 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Fri, 6 Apr 2018 10:00:10 +0100 Subject: COMPMID-1056 - Optimizing CLGEMMMatrixMultiplyKernel refactoring the inner loop Results reported at: https://confluence.arm.com/display/MLENG/GEMM+FP32+performance%3A+ACL+18.05 Change-Id: I3246c4f19c4d21a7d6a44e4593bc5caffc016f81 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/127838 Tested-by: Jenkins Reviewed-by: Georgios Pinitas Reviewed-by: Anthony Barbier --- arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h') diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h index 584266b824..67c0467f3a 100644 --- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h +++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h @@ -27,11 +27,11 @@ #include "arm_compute/runtime/CL/ICLSimpleFunction.h" #include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" #include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" #include "arm_compute/core/CL/kernels/CLTransposeKernel.h" #include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLGEMM.h" #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h" @@ -113,12 +113,12 @@ public: private: void configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output); void configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output); - void configure_mm(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output, bool is_interleaved_transposed = true); + void configure_mm(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output); CLMemoryGroup _memory_group; CLIm2ColKernel _im2col_kernel; CLFullyConnectedLayerReshapeWeights _reshape_weights_kernel; - CLGEMMMatrixMultiplyKernel _mm_kernel; + CLGEMM _mm_gemm; CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp; CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage; CLGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; -- cgit v1.2.1