aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2021-08-20 15:24:03 +0100
committerGiorgio Arena <giorgio.arena@arm.com>2021-08-24 11:46:13 +0000
commit9c67d3883adaa8a65e3567579cf99937deaab21a (patch)
treee3b11c5918b4121e8e7d5ad728847ce74a8ed77f
parent87a74effff65f6fa1b0e565818e02c3b414ae1cf (diff)
downloadComputeLibrary-9c67d3883adaa8a65e3567579cf99937deaab21a.tar.gz
Remove map/unmap overhead for input/output accessor when using DummyAccessor
Don't map/unmap when the tensor data is not accessed in the input or output accessor. This is so to avoid measuring CPU overhead when benchmarking on the GPU backend. Resolve COMPMID-4712 Change-Id: I5baba1b93e7a51fe13525bcce6c0cfdecb14493e Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6140 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/graph/IDeviceBackend.h4
-rw-r--r--arm_compute/graph/ITensorAccessor.h10
-rw-r--r--arm_compute/graph/Utils.h2
-rw-r--r--arm_compute/graph/backends/CL/CLDeviceBackend.h3
-rw-r--r--arm_compute/graph/backends/NEON/NEDeviceBackend.h1
-rw-r--r--src/graph/Tensor.cpp24
-rw-r--r--src/graph/Utils.cpp11
-rw-r--r--src/graph/backends/CL/CLDeviceBackend.cpp5
-rw-r--r--src/graph/backends/NEON/NEDeviceBackend.cpp5
-rw-r--r--src/graph/detail/ExecutionHelpers.cpp4
-rw-r--r--utils/GraphUtils.cpp5
-rw-r--r--utils/GraphUtils.h1
12 files changed, 63 insertions, 12 deletions
diff --git a/arm_compute/graph/IDeviceBackend.h b/arm_compute/graph/IDeviceBackend.h
index 3a0a7e634e..f84aac0ae0 100644
--- a/arm_compute/graph/IDeviceBackend.h
+++ b/arm_compute/graph/IDeviceBackend.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2019,2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -118,6 +118,8 @@ public:
* @return Weights manager
*/
virtual std::shared_ptr<arm_compute::IWeightsManager> create_weights_manager() = 0;
+ /** Synchronize kernels execution on the backend. On GPU, this results in a blocking call waiting for all kernels to be completed. */
+ virtual void sync() = 0;
};
} // namespace backends
} // namespace graph
diff --git a/arm_compute/graph/ITensorAccessor.h b/arm_compute/graph/ITensorAccessor.h
index 96bd499573..a8818be2e5 100644
--- a/arm_compute/graph/ITensorAccessor.h
+++ b/arm_compute/graph/ITensorAccessor.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2019,2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,6 +45,14 @@ public:
* @return True if access is successful else false
*/
virtual bool access_tensor(ITensor &tensor) = 0;
+ /** Returns true if the tensor data is being accessed
+ *
+ * @return True if the tensor data is being accessed by the accessor. False otherwise
+ */
+ virtual bool access_tensor_data()
+ {
+ return true;
+ }
};
using ITensorAccessorUPtr = std::unique_ptr<ITensorAccessor>;
diff --git a/arm_compute/graph/Utils.h b/arm_compute/graph/Utils.h
index 6c3b76839e..43a8eca9e3 100644
--- a/arm_compute/graph/Utils.h
+++ b/arm_compute/graph/Utils.h
@@ -107,6 +107,8 @@ void setup_requested_backend_context(GraphContext &ctx, Target target);
* @param[in,out] ctx Graph Context
*/
void release_default_graph_context(GraphContext &ctx);
+/** Synchronize kernels execution on the backends. On GPU, this results in a blocking call waiting for all kernels to be completed. */
+void sync_backends();
/** Get size of a tensor's given dimension depending on its layout
*
* @param[in] descriptor Descriptor
diff --git a/arm_compute/graph/backends/CL/CLDeviceBackend.h b/arm_compute/graph/backends/CL/CLDeviceBackend.h
index 278a8e5031..63674ad794 100644
--- a/arm_compute/graph/backends/CL/CLDeviceBackend.h
+++ b/arm_compute/graph/backends/CL/CLDeviceBackend.h
@@ -28,8 +28,8 @@
#include "arm_compute/runtime/CL/CLBufferAllocator.h"
#include "arm_compute/runtime/CL/CLGEMMHeuristicsHandle.h"
-#include "arm_compute/runtime/CL/CLTypes.h"
#include "arm_compute/runtime/CL/CLTuner.h"
+#include "arm_compute/runtime/CL/CLTypes.h"
namespace arm_compute
{
@@ -70,6 +70,7 @@ public:
Status validate_node(INode &node) override;
std::shared_ptr<arm_compute::IMemoryManager> create_memory_manager(MemoryManagerAffinity affinity) override;
std::shared_ptr<arm_compute::IWeightsManager> create_weights_manager() override;
+ void sync() override;
private:
int _context_count; /**< Counts how many contexts are currently using the backend */
diff --git a/arm_compute/graph/backends/NEON/NEDeviceBackend.h b/arm_compute/graph/backends/NEON/NEDeviceBackend.h
index 0b343c0d67..9cb37d4553 100644
--- a/arm_compute/graph/backends/NEON/NEDeviceBackend.h
+++ b/arm_compute/graph/backends/NEON/NEDeviceBackend.h
@@ -52,6 +52,7 @@ public:
Status validate_node(INode &node) override;
std::shared_ptr<arm_compute::IMemoryManager> create_memory_manager(MemoryManagerAffinity affinity) override;
std::shared_ptr<arm_compute::IWeightsManager> create_weights_manager() override;
+ void sync() override;
private:
Allocator _allocator; /**< Backend allocator */
diff --git a/src/graph/Tensor.cpp b/src/graph/Tensor.cpp
index f69d49d016..3d4723430f 100644
--- a/src/graph/Tensor.cpp
+++ b/src/graph/Tensor.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2019,2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -80,20 +80,28 @@ bool Tensor::call_accessor()
return false;
}
- // Map tensor
- _handle->map(true);
+ const bool access_data = _accessor->access_tensor_data();
- // Return in case of null backend buffer
- if(_handle->tensor().buffer() == nullptr)
+ if(access_data)
{
- return false;
+ // Map tensor
+ _handle->map(true);
+
+ // Return in case of null backend buffer
+ if(_handle->tensor().buffer() == nullptr)
+ {
+ return false;
+ }
}
// Call accessor
bool retval = _accessor->access_tensor(_handle->tensor());
- // Unmap tensor
- _handle->unmap();
+ if(access_data)
+ {
+ // Unmap tensor
+ _handle->unmap();
+ }
return retval;
}
diff --git a/src/graph/Utils.cpp b/src/graph/Utils.cpp
index 37af1bff93..ceb5ba95de 100644
--- a/src/graph/Utils.cpp
+++ b/src/graph/Utils.cpp
@@ -116,6 +116,17 @@ void release_default_graph_context(GraphContext &ctx)
}
}
+void sync_backends()
+{
+ for(const auto &backend : backends::BackendRegistry::get().backends())
+ {
+ if(backend.second->is_backend_supported())
+ {
+ backend.second->sync();
+ }
+ }
+}
+
void setup_requested_backend_context(GraphContext &ctx, Target target)
{
if(backends::BackendRegistry::get().contains(target))
diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp
index b6b25cc7d0..01e5ab1730 100644
--- a/src/graph/backends/CL/CLDeviceBackend.cpp
+++ b/src/graph/backends/CL/CLDeviceBackend.cpp
@@ -221,6 +221,11 @@ std::shared_ptr<arm_compute::IWeightsManager> CLDeviceBackend::create_weights_ma
auto weights_mgr = std::make_shared<IWeightsManager>();
return weights_mgr;
}
+
+void CLDeviceBackend::sync()
+{
+ CLScheduler::get().sync();
+}
} // namespace backends
} // namespace graph
} // namespace arm_compute
diff --git a/src/graph/backends/NEON/NEDeviceBackend.cpp b/src/graph/backends/NEON/NEDeviceBackend.cpp
index 9efa3ac0c8..47ef2c2394 100644
--- a/src/graph/backends/NEON/NEDeviceBackend.cpp
+++ b/src/graph/backends/NEON/NEDeviceBackend.cpp
@@ -175,6 +175,11 @@ std::shared_ptr<arm_compute::IWeightsManager> NEDeviceBackend::create_weights_ma
auto weights_mgr = std::make_shared<IWeightsManager>();
return weights_mgr;
}
+
+void NEDeviceBackend::sync()
+{
+ // nop
+}
} // namespace backends
} // namespace graph
} // namespace arm_compute
diff --git a/src/graph/detail/ExecutionHelpers.cpp b/src/graph/detail/ExecutionHelpers.cpp
index 5be3706cfe..ac800df76c 100644
--- a/src/graph/detail/ExecutionHelpers.cpp
+++ b/src/graph/detail/ExecutionHelpers.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -265,6 +265,8 @@ bool call_all_output_node_accessors(ExecutionWorkload &workload)
is_valid = is_valid && valid_output;
});
+ sync_backends();
+
return is_valid;
}
} // namespace detail
diff --git a/utils/GraphUtils.cpp b/utils/GraphUtils.cpp
index 20e1369e73..c3f71299f6 100644
--- a/utils/GraphUtils.cpp
+++ b/utils/GraphUtils.cpp
@@ -162,6 +162,11 @@ DummyAccessor::DummyAccessor(unsigned int maximum)
{
}
+bool DummyAccessor::access_tensor_data()
+{
+ return false;
+}
+
bool DummyAccessor::access_tensor(ITensor &tensor)
{
ARM_COMPUTE_UNUSED(tensor);
diff --git a/utils/GraphUtils.h b/utils/GraphUtils.h
index d2b05f27c6..80055acc0f 100644
--- a/utils/GraphUtils.h
+++ b/utils/GraphUtils.h
@@ -135,6 +135,7 @@ public:
DummyAccessor(DummyAccessor &&) = default;
// Inherited methods overriden:
+ bool access_tensor_data() override;
bool access_tensor(ITensor &tensor) override;
private: