aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2018-04-06 10:00:10 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:49:37 +0000
commitc9c62c2fa1c80ba7f11b0d0732740460dfa00e74 (patch)
tree260052aa5c7172e2afc8517ae13adb75504ee62e /arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
parent3ab6804a0e30be4d8591c8c84ae6a73940d0f2e2 (diff)
downloadComputeLibrary-c9c62c2fa1c80ba7f11b0d0732740460dfa00e74.tar.gz
COMPMID-1056 - Optimizing CLGEMMMatrixMultiplyKernel refactoring the inner loop
Results reported at: https://confluence.arm.com/display/MLENG/GEMM+FP32+performance%3A+ACL+18.05 Change-Id: I3246c4f19c4d21a7d6a44e4593bc5caffc016f81 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/127838 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h')
-rw-r--r--arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h6
1 files changed, 3 insertions, 3 deletions
diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
index 584266b824..67c0467f3a 100644
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -27,11 +27,11 @@
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLGEMM.h"
#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
@@ -113,12 +113,12 @@ public:
private:
void configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output);
void configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output);
- void configure_mm(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output, bool is_interleaved_transposed = true);
+ void configure_mm(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output);
CLMemoryGroup _memory_group;
CLIm2ColKernel _im2col_kernel;
CLFullyConnectedLayerReshapeWeights _reshape_weights_kernel;
- CLGEMMMatrixMultiplyKernel _mm_kernel;
+ CLGEMM _mm_gemm;
CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage;
CLGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;