aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/graph/Graph.h
diff options
context:
space:
mode:
authorGian Marco <gianmarco.iodice@arm.com>2018-01-12 10:21:40 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:44:21 +0000
commit36a0a4608bf413fc1fd65eb335bfb736ef602149 (patch)
tree2ff0e35dc9e16fedd601b1f24bdc13d25d075b90 /arm_compute/graph/Graph.h
parent46edf63bd630f5e3f3eb31b7d4602caa317da075 (diff)
downloadComputeLibrary-36a0a4608bf413fc1fd65eb335bfb736ef602149.tar.gz
COMPMID-748 - Integrating optimized SGEMM for bifrost
This patch introduces a new GEMM capable to improve the mac utilisation of 10% compared to the GEMM without reshape. However this implementation is not faster in all cases as we need to take into account the time for reshaping the matrices. For this reason an heuristic solution to select the optimal GEMM to use has been added to the function. More information about the heuristic implementation can be found at COMPMID-852. With this new patch, GoogleNet, MobileNet, VGG16 and SqueezeNet can improved the performance of 1.5x. More information about the performance uplift can be found here: https://confluence.arm.com/display/MLENG/GEMM+FP32+performance%3A+ACL+18.02 Change-Id: I024563c06b9aed02a211a974e452bae5c233b04c Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/117140 Reviewed-by: Pablo Tello <pablo.tello@arm.com> Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'arm_compute/graph/Graph.h')
-rw-r--r--arm_compute/graph/Graph.h8
1 files changed, 6 insertions, 2 deletions
diff --git a/arm_compute/graph/Graph.h b/arm_compute/graph/Graph.h
index ab1d8b8866..853b90df82 100644
--- a/arm_compute/graph/Graph.h
+++ b/arm_compute/graph/Graph.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,6 +24,7 @@
#ifndef __ARM_COMPUTE_GRAPH_GRAPH_H__
#define __ARM_COMPUTE_GRAPH_GRAPH_H__
+#include "arm_compute/core/CL/CLTypes.h"
#include "arm_compute/graph/INode.h"
#include "arm_compute/graph/ITensorObject.h"
#include "arm_compute/graph/SubTensor.h"
@@ -67,9 +68,12 @@ public:
* @param[in] tensor Tensor to add
*/
void add_tensor_object(std::unique_ptr<ITensorObject> tensor);
- /** Finalizes the current node's configuration
+ /** Check if the OpenCL target is available
*/
static bool opencl_is_available();
+ /** Returns the GPU target
+ */
+ static GPUTarget gpu_target();
/** Manually sets the output of the current node
*
* @param[in] tmp Output info to set