From 9c67d3883adaa8a65e3567579cf99937deaab21a Mon Sep 17 00:00:00 2001
From: Giorgio Arena <giorgio.arena@arm.com>
Date: Fri, 20 Aug 2021 15:24:03 +0100
Subject: Remove map/unmap overhead for input/output accessor when using
 DummyAccessor

Don't map/unmap when the tensor data is not accessed in the input or output accessor.
This is so to avoid measuring CPU overhead when benchmarking on the GPU backend.

Resolve COMPMID-4712

Change-Id: I5baba1b93e7a51fe13525bcce6c0cfdecb14493e
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6140
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/graph/IDeviceBackend.h                |  4 +++-
 arm_compute/graph/ITensorAccessor.h               | 10 +++++++++-
 arm_compute/graph/Utils.h                         |  2 ++
 arm_compute/graph/backends/CL/CLDeviceBackend.h   |  3 ++-
 arm_compute/graph/backends/NEON/NEDeviceBackend.h |  1 +
 5 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'arm_compute/graph')
diff --git a/arm_compute/graph/IDeviceBackend.h b/arm_compute/graph/IDeviceBackend.h
index 3a0a7e634e..f84aac0ae0 100644
--- a/arm_compute/graph/IDeviceBackend.h
+++ b/arm_compute/graph/IDeviceBackend.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2019,2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -118,6 +118,8 @@ public:
      * @return Weights manager
      */
     virtual std::shared_ptr<arm_compute::IWeightsManager> create_weights_manager() = 0;
+    /** Synchronize kernels execution on the backend. On GPU, this results in a blocking call waiting for all kernels to be completed. */
+    virtual void sync() = 0;
 };
 } // namespace backends
 } // namespace graph
diff --git a/arm_compute/graph/ITensorAccessor.h b/arm_compute/graph/ITensorAccessor.h
index 96bd499573..a8818be2e5 100644
--- a/arm_compute/graph/ITensorAccessor.h
+++ b/arm_compute/graph/ITensorAccessor.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2019,2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -45,6 +45,14 @@ public:
      * @return True if access is successful else false
      */
     virtual bool access_tensor(ITensor &tensor) = 0;
+    /** Returns true if the tensor data is being accessed
+     *
+     * @return True if the tensor data is being accessed by the accessor. False otherwise
+     */
+    virtual bool access_tensor_data()
+    {
+        return true;
+    }
 };
 
 using ITensorAccessorUPtr = std::unique_ptr<ITensorAccessor>;
diff --git a/arm_compute/graph/Utils.h b/arm_compute/graph/Utils.h
index 6c3b76839e..43a8eca9e3 100644
--- a/arm_compute/graph/Utils.h
+++ b/arm_compute/graph/Utils.h
@@ -107,6 +107,8 @@ void setup_requested_backend_context(GraphContext &ctx, Target target);
  * @param[in,out] ctx Graph Context
  */
 void release_default_graph_context(GraphContext &ctx);
+/** Synchronize kernels execution on the backends. On GPU, this results in a blocking call waiting for all kernels to be completed. */
+void sync_backends();
 /** Get size of a tensor's given dimension depending on its layout
  *
  * @param[in] descriptor            Descriptor
diff --git a/arm_compute/graph/backends/CL/CLDeviceBackend.h b/arm_compute/graph/backends/CL/CLDeviceBackend.h
index 278a8e5031..63674ad794 100644
--- a/arm_compute/graph/backends/CL/CLDeviceBackend.h
+++ b/arm_compute/graph/backends/CL/CLDeviceBackend.h
@@ -28,8 +28,8 @@
 
 #include "arm_compute/runtime/CL/CLBufferAllocator.h"
 #include "arm_compute/runtime/CL/CLGEMMHeuristicsHandle.h"
-#include "arm_compute/runtime/CL/CLTypes.h"
 #include "arm_compute/runtime/CL/CLTuner.h"
+#include "arm_compute/runtime/CL/CLTypes.h"
 
 namespace arm_compute
 {
@@ -70,6 +70,7 @@ public:
     Status validate_node(INode &node) override;
     std::shared_ptr<arm_compute::IMemoryManager> create_memory_manager(MemoryManagerAffinity affinity) override;
     std::shared_ptr<arm_compute::IWeightsManager> create_weights_manager() override;
+    void                                          sync() override;
 
 private:
     int                                _context_count;   /**< Counts how many contexts are currently using the backend */
diff --git a/arm_compute/graph/backends/NEON/NEDeviceBackend.h b/arm_compute/graph/backends/NEON/NEDeviceBackend.h
index 0b343c0d67..9cb37d4553 100644
--- a/arm_compute/graph/backends/NEON/NEDeviceBackend.h
+++ b/arm_compute/graph/backends/NEON/NEDeviceBackend.h
@@ -52,6 +52,7 @@ public:
     Status validate_node(INode &node) override;
     std::shared_ptr<arm_compute::IMemoryManager> create_memory_manager(MemoryManagerAffinity affinity) override;
     std::shared_ptr<arm_compute::IWeightsManager> create_weights_manager() override;
+    void                                          sync() override;
 
 private:
     Allocator _allocator; /**< Backend allocator */
-- 
cgit v1.2.1