From 9c67d3883adaa8a65e3567579cf99937deaab21a Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Fri, 20 Aug 2021 15:24:03 +0100 Subject: Remove map/unmap overhead for input/output accessor when using DummyAccessor Don't map/unmap when the tensor data is not accessed in the input or output accessor. This is so to avoid measuring CPU overhead when benchmarking on the GPU backend. Resolve COMPMID-4712 Change-Id: I5baba1b93e7a51fe13525bcce6c0cfdecb14493e Signed-off-by: Giorgio Arena Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6140 Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins --- arm_compute/graph/IDeviceBackend.h | 4 +++- arm_compute/graph/ITensorAccessor.h | 10 +++++++++- arm_compute/graph/Utils.h | 2 ++ arm_compute/graph/backends/CL/CLDeviceBackend.h | 3 ++- arm_compute/graph/backends/NEON/NEDeviceBackend.h | 1 + src/graph/Tensor.cpp | 24 +++++++++++++++-------- src/graph/Utils.cpp | 11 +++++++++++ src/graph/backends/CL/CLDeviceBackend.cpp | 5 +++++ src/graph/backends/NEON/NEDeviceBackend.cpp | 5 +++++ src/graph/detail/ExecutionHelpers.cpp | 4 +++- utils/GraphUtils.cpp | 5 +++++ utils/GraphUtils.h | 1 + 12 files changed, 63 insertions(+), 12 deletions(-) diff --git a/arm_compute/graph/IDeviceBackend.h b/arm_compute/graph/IDeviceBackend.h index 3a0a7e634e..f84aac0ae0 100644 --- a/arm_compute/graph/IDeviceBackend.h +++ b/arm_compute/graph/IDeviceBackend.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2019,2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -118,6 +118,8 @@ public: * @return Weights manager */ virtual std::shared_ptr create_weights_manager() = 0; + /** Synchronize kernels execution on the backend. On GPU, this results in a blocking call waiting for all kernels to be completed. */ + virtual void sync() = 0; }; } // namespace backends } // namespace graph diff --git a/arm_compute/graph/ITensorAccessor.h b/arm_compute/graph/ITensorAccessor.h index 96bd499573..a8818be2e5 100644 --- a/arm_compute/graph/ITensorAccessor.h +++ b/arm_compute/graph/ITensorAccessor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2019,2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,6 +45,14 @@ public: * @return True if access is successful else false */ virtual bool access_tensor(ITensor &tensor) = 0; + /** Returns true if the tensor data is being accessed + * + * @return True if the tensor data is being accessed by the accessor. False otherwise + */ + virtual bool access_tensor_data() + { + return true; + } }; using ITensorAccessorUPtr = std::unique_ptr; diff --git a/arm_compute/graph/Utils.h b/arm_compute/graph/Utils.h index 6c3b76839e..43a8eca9e3 100644 --- a/arm_compute/graph/Utils.h +++ b/arm_compute/graph/Utils.h @@ -107,6 +107,8 @@ void setup_requested_backend_context(GraphContext &ctx, Target target); * @param[in,out] ctx Graph Context */ void release_default_graph_context(GraphContext &ctx); +/** Synchronize kernels execution on the backends. On GPU, this results in a blocking call waiting for all kernels to be completed. */ +void sync_backends(); /** Get size of a tensor's given dimension depending on its layout * * @param[in] descriptor Descriptor diff --git a/arm_compute/graph/backends/CL/CLDeviceBackend.h b/arm_compute/graph/backends/CL/CLDeviceBackend.h index 278a8e5031..63674ad794 100644 --- a/arm_compute/graph/backends/CL/CLDeviceBackend.h +++ b/arm_compute/graph/backends/CL/CLDeviceBackend.h @@ -28,8 +28,8 @@ #include "arm_compute/runtime/CL/CLBufferAllocator.h" #include "arm_compute/runtime/CL/CLGEMMHeuristicsHandle.h" -#include "arm_compute/runtime/CL/CLTypes.h" #include "arm_compute/runtime/CL/CLTuner.h" +#include "arm_compute/runtime/CL/CLTypes.h" namespace arm_compute { @@ -70,6 +70,7 @@ public: Status validate_node(INode &node) override; std::shared_ptr create_memory_manager(MemoryManagerAffinity affinity) override; std::shared_ptr create_weights_manager() override; + void sync() override; private: int _context_count; /**< Counts how many contexts are currently using the backend */ diff --git a/arm_compute/graph/backends/NEON/NEDeviceBackend.h b/arm_compute/graph/backends/NEON/NEDeviceBackend.h index 0b343c0d67..9cb37d4553 100644 --- a/arm_compute/graph/backends/NEON/NEDeviceBackend.h +++ b/arm_compute/graph/backends/NEON/NEDeviceBackend.h @@ -52,6 +52,7 @@ public: Status validate_node(INode &node) override; std::shared_ptr create_memory_manager(MemoryManagerAffinity affinity) override; std::shared_ptr create_weights_manager() override; + void sync() override; private: Allocator _allocator; /**< Backend allocator */ diff --git a/src/graph/Tensor.cpp b/src/graph/Tensor.cpp index f69d49d016..3d4723430f 100644 --- a/src/graph/Tensor.cpp +++ b/src/graph/Tensor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2019,2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -80,20 +80,28 @@ bool Tensor::call_accessor() return false; } - // Map tensor - _handle->map(true); + const bool access_data = _accessor->access_tensor_data(); - // Return in case of null backend buffer - if(_handle->tensor().buffer() == nullptr) + if(access_data) { - return false; + // Map tensor + _handle->map(true); + + // Return in case of null backend buffer + if(_handle->tensor().buffer() == nullptr) + { + return false; + } } // Call accessor bool retval = _accessor->access_tensor(_handle->tensor()); - // Unmap tensor - _handle->unmap(); + if(access_data) + { + // Unmap tensor + _handle->unmap(); + } return retval; } diff --git a/src/graph/Utils.cpp b/src/graph/Utils.cpp index 37af1bff93..ceb5ba95de 100644 --- a/src/graph/Utils.cpp +++ b/src/graph/Utils.cpp @@ -116,6 +116,17 @@ void release_default_graph_context(GraphContext &ctx) } } +void sync_backends() +{ + for(const auto &backend : backends::BackendRegistry::get().backends()) + { + if(backend.second->is_backend_supported()) + { + backend.second->sync(); + } + } +} + void setup_requested_backend_context(GraphContext &ctx, Target target) { if(backends::BackendRegistry::get().contains(target)) diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp index b6b25cc7d0..01e5ab1730 100644 --- a/src/graph/backends/CL/CLDeviceBackend.cpp +++ b/src/graph/backends/CL/CLDeviceBackend.cpp @@ -221,6 +221,11 @@ std::shared_ptr CLDeviceBackend::create_weights_ma auto weights_mgr = std::make_shared(); return weights_mgr; } + +void CLDeviceBackend::sync() +{ + CLScheduler::get().sync(); +} } // namespace backends } // namespace graph } // namespace arm_compute diff --git a/src/graph/backends/NEON/NEDeviceBackend.cpp b/src/graph/backends/NEON/NEDeviceBackend.cpp index 9efa3ac0c8..47ef2c2394 100644 --- a/src/graph/backends/NEON/NEDeviceBackend.cpp +++ b/src/graph/backends/NEON/NEDeviceBackend.cpp @@ -175,6 +175,11 @@ std::shared_ptr NEDeviceBackend::create_weights_ma auto weights_mgr = std::make_shared(); return weights_mgr; } + +void NEDeviceBackend::sync() +{ + // nop +} } // namespace backends } // namespace graph } // namespace arm_compute diff --git a/src/graph/detail/ExecutionHelpers.cpp b/src/graph/detail/ExecutionHelpers.cpp index 5be3706cfe..ac800df76c 100644 --- a/src/graph/detail/ExecutionHelpers.cpp +++ b/src/graph/detail/ExecutionHelpers.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -265,6 +265,8 @@ bool call_all_output_node_accessors(ExecutionWorkload &workload) is_valid = is_valid && valid_output; }); + sync_backends(); + return is_valid; } } // namespace detail diff --git a/utils/GraphUtils.cpp b/utils/GraphUtils.cpp index 20e1369e73..c3f71299f6 100644 --- a/utils/GraphUtils.cpp +++ b/utils/GraphUtils.cpp @@ -162,6 +162,11 @@ DummyAccessor::DummyAccessor(unsigned int maximum) { } +bool DummyAccessor::access_tensor_data() +{ + return false; +} + bool DummyAccessor::access_tensor(ITensor &tensor) { ARM_COMPUTE_UNUSED(tensor); diff --git a/utils/GraphUtils.h b/utils/GraphUtils.h index d2b05f27c6..80055acc0f 100644 --- a/utils/GraphUtils.h +++ b/utils/GraphUtils.h @@ -135,6 +135,7 @@ public: DummyAccessor(DummyAccessor &&) = default; // Inherited methods overriden: + bool access_tensor_data() override; bool access_tensor(ITensor &tensor) override; private: -- cgit v1.2.1