From 9c67d3883adaa8a65e3567579cf99937deaab21a Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Fri, 20 Aug 2021 15:24:03 +0100 Subject: Remove map/unmap overhead for input/output accessor when using DummyAccessor Don't map/unmap when the tensor data is not accessed in the input or output accessor. This is so to avoid measuring CPU overhead when benchmarking on the GPU backend. Resolve COMPMID-4712 Change-Id: I5baba1b93e7a51fe13525bcce6c0cfdecb14493e Signed-off-by: Giorgio Arena Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6140 Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins --- arm_compute/graph/IDeviceBackend.h | 4 +++- arm_compute/graph/ITensorAccessor.h | 10 +++++++++- arm_compute/graph/Utils.h | 2 ++ arm_compute/graph/backends/CL/CLDeviceBackend.h | 3 ++- arm_compute/graph/backends/NEON/NEDeviceBackend.h | 1 + 5 files changed, 17 insertions(+), 3 deletions(-) (limited to 'arm_compute/graph') diff --git a/arm_compute/graph/IDeviceBackend.h b/arm_compute/graph/IDeviceBackend.h index 3a0a7e634e..f84aac0ae0 100644 --- a/arm_compute/graph/IDeviceBackend.h +++ b/arm_compute/graph/IDeviceBackend.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2019,2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -118,6 +118,8 @@ public: * @return Weights manager */ virtual std::shared_ptr create_weights_manager() = 0; + /** Synchronize kernels execution on the backend. On GPU, this results in a blocking call waiting for all kernels to be completed. */ + virtual void sync() = 0; }; } // namespace backends } // namespace graph diff --git a/arm_compute/graph/ITensorAccessor.h b/arm_compute/graph/ITensorAccessor.h index 96bd499573..a8818be2e5 100644 --- a/arm_compute/graph/ITensorAccessor.h +++ b/arm_compute/graph/ITensorAccessor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2019,2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,6 +45,14 @@ public: * @return True if access is successful else false */ virtual bool access_tensor(ITensor &tensor) = 0; + /** Returns true if the tensor data is being accessed + * + * @return True if the tensor data is being accessed by the accessor. False otherwise + */ + virtual bool access_tensor_data() + { + return true; + } }; using ITensorAccessorUPtr = std::unique_ptr; diff --git a/arm_compute/graph/Utils.h b/arm_compute/graph/Utils.h index 6c3b76839e..43a8eca9e3 100644 --- a/arm_compute/graph/Utils.h +++ b/arm_compute/graph/Utils.h @@ -107,6 +107,8 @@ void setup_requested_backend_context(GraphContext &ctx, Target target); * @param[in,out] ctx Graph Context */ void release_default_graph_context(GraphContext &ctx); +/** Synchronize kernels execution on the backends. On GPU, this results in a blocking call waiting for all kernels to be completed. */ +void sync_backends(); /** Get size of a tensor's given dimension depending on its layout * * @param[in] descriptor Descriptor diff --git a/arm_compute/graph/backends/CL/CLDeviceBackend.h b/arm_compute/graph/backends/CL/CLDeviceBackend.h index 278a8e5031..63674ad794 100644 --- a/arm_compute/graph/backends/CL/CLDeviceBackend.h +++ b/arm_compute/graph/backends/CL/CLDeviceBackend.h @@ -28,8 +28,8 @@ #include "arm_compute/runtime/CL/CLBufferAllocator.h" #include "arm_compute/runtime/CL/CLGEMMHeuristicsHandle.h" -#include "arm_compute/runtime/CL/CLTypes.h" #include "arm_compute/runtime/CL/CLTuner.h" +#include "arm_compute/runtime/CL/CLTypes.h" namespace arm_compute { @@ -70,6 +70,7 @@ public: Status validate_node(INode &node) override; std::shared_ptr create_memory_manager(MemoryManagerAffinity affinity) override; std::shared_ptr create_weights_manager() override; + void sync() override; private: int _context_count; /**< Counts how many contexts are currently using the backend */ diff --git a/arm_compute/graph/backends/NEON/NEDeviceBackend.h b/arm_compute/graph/backends/NEON/NEDeviceBackend.h index 0b343c0d67..9cb37d4553 100644 --- a/arm_compute/graph/backends/NEON/NEDeviceBackend.h +++ b/arm_compute/graph/backends/NEON/NEDeviceBackend.h @@ -52,6 +52,7 @@ public: Status validate_node(INode &node) override; std::shared_ptr create_memory_manager(MemoryManagerAffinity affinity) override; std::shared_ptr create_weights_manager() override; + void sync() override; private: Allocator _allocator; /**< Backend allocator */ -- cgit v1.2.1