aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON/kernels/convolution/common/profiler.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/core/NEON/kernels/convolution/common/profiler.hpp')
-rw-r--r--arm_compute/core/NEON/kernels/convolution/common/profiler.hpp31
1 files changed, 23 insertions, 8 deletions
diff --git a/arm_compute/core/NEON/kernels/convolution/common/profiler.hpp b/arm_compute/core/NEON/kernels/convolution/common/profiler.hpp
index 01fafa9604..c6897e3771 100644
--- a/arm_compute/core/NEON/kernels/convolution/common/profiler.hpp
+++ b/arm_compute/core/NEON/kernels/convolution/common/profiler.hpp
@@ -24,17 +24,21 @@
#pragma once
+#include <cstdio>
+#include <cstring>
+#include <chrono>
+#include <unistd.h>
+
+#ifdef CYCLE_PROFILING
#include <algorithm>
#include <cmath>
-#include <cstring>
-#include <cstdio>
#include <map>
#include <mutex>
#include <thread>
#include <vector>
#include "perf.h"
-#include <unistd.h>
+#endif // CYCLE_PROFILING
#ifdef CYCLE_PROFILING
class EventIDContainer
@@ -295,32 +299,43 @@ public:
#endif // CYCLE_PROFILING
template <typename T>
- void operator() (const char * event,
- T func,
- long int bytes_read = 0,
- long int ops = 0,
- long int bytes_written = 0) {
+ double operator() (const char * event,
+ T func,
+ long int bytes_read = 0,
+ long int ops = 0,
+ long int bytes_written = 0) {
#ifdef CYCLE_PROFILING
if (currentevent==maxevents) {
+ const std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now();
func();
+ const std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now();
+ return std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
} else {
const auto countfd = thread_counter_fds.get_counter_fd();
+ const std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now();
start_counter(countfd);
func();
long long cycs = stop_counter(countfd);
+ const std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now();
+ return std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
// Store the profiling data
std::lock_guard<std::mutex> lock_events(event_lock);
events[currentevent++] = {
get_event_id(event), bytes_read, ops, bytes_written, cycs
};
+
+ return duration_us;
}
#else
(void) event;
(void) bytes_read;
(void) ops;
(void) bytes_written;
+ const std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now();
func();
+ const std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now();
+ return std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
#endif // CYCLE_PROFILING
}
};