From 36a0a4608bf413fc1fd65eb335bfb736ef602149 Mon Sep 17 00:00:00 2001 From: Gian Marco Date: Fri, 12 Jan 2018 10:21:40 +0000 Subject: COMPMID-748 - Integrating optimized SGEMM for bifrost This patch introduces a new GEMM capable to improve the mac utilisation of 10% compared to the GEMM without reshape. However this implementation is not faster in all cases as we need to take into account the time for reshaping the matrices. For this reason an heuristic solution to select the optimal GEMM to use has been added to the function. More information about the heuristic implementation can be found at COMPMID-852. With this new patch, GoogleNet, MobileNet, VGG16 and SqueezeNet can improved the performance of 1.5x. More information about the performance uplift can be found here: https://confluence.arm.com/display/MLENG/GEMM+FP32+performance%3A+ACL+18.02 Change-Id: I024563c06b9aed02a211a974e452bae5c233b04c Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/117140 Reviewed-by: Pablo Tello Tested-by: Jenkins Reviewed-by: Anthony Barbier --- arm_compute/runtime/CL/functions/CLGEMM.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arm_compute/runtime/CL/functions/CLGEMM.h') diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h index bf41226bda..0f144915d7 100644 --- a/arm_compute/runtime/CL/functions/CLGEMM.h +++ b/arm_compute/runtime/CL/functions/CLGEMM.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -68,7 +68,8 @@ public: * @param[in] alpha Weight of the matrix product * @param[in] beta Weight of matrix C * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and - * if the reshape of matrix B should happen only for the first run + * if the reshape of matrix B should happen only for the first run. GEMMInfo also contains information about the reshaping + * in case matrix A and matrix B have been already transformed. */ void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo()); -- cgit v1.2.1