From be0ae93c50bfa3e588111585025278daa8cb0694 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 13 Mar 2018 13:08:12 +0000 Subject: COMPMID-1005: Update Depthwise Convolution form RSH Change-Id: I3033ddb8de183661010d6c71a83f71132037b139 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/124338 Tested-by: Jenkins Reviewed-by: Pablo Tello --- .../NEON/kernels/convolution/common/profiler.hpp | 31 ++++++++++++++++------ .../kernels/convolution/common/tensor_utils.hpp | 9 ++++--- .../core/NEON/kernels/convolution/common/utils.hpp | 1 - 3 files changed, 29 insertions(+), 12 deletions(-) (limited to 'arm_compute/core/NEON/kernels/convolution/common') diff --git a/arm_compute/core/NEON/kernels/convolution/common/profiler.hpp b/arm_compute/core/NEON/kernels/convolution/common/profiler.hpp index 01fafa9604..c6897e3771 100644 --- a/arm_compute/core/NEON/kernels/convolution/common/profiler.hpp +++ b/arm_compute/core/NEON/kernels/convolution/common/profiler.hpp @@ -24,17 +24,21 @@ #pragma once +#include +#include +#include +#include + +#ifdef CYCLE_PROFILING #include #include -#include -#include #include #include #include #include #include "perf.h" -#include +#endif // CYCLE_PROFILING #ifdef CYCLE_PROFILING class EventIDContainer @@ -295,32 +299,43 @@ public: #endif // CYCLE_PROFILING template - void operator() (const char * event, - T func, - long int bytes_read = 0, - long int ops = 0, - long int bytes_written = 0) { + double operator() (const char * event, + T func, + long int bytes_read = 0, + long int ops = 0, + long int bytes_written = 0) { #ifdef CYCLE_PROFILING if (currentevent==maxevents) { + const std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now(); func(); + const std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now(); + return std::chrono::duration_cast(end - start).count(); } else { const auto countfd = thread_counter_fds.get_counter_fd(); + const std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now(); start_counter(countfd); func(); long long cycs = stop_counter(countfd); + const std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now(); + return std::chrono::duration_cast(end - start).count(); // Store the profiling data std::lock_guard lock_events(event_lock); events[currentevent++] = { get_event_id(event), bytes_read, ops, bytes_written, cycs }; + + return duration_us; } #else (void) event; (void) bytes_read; (void) ops; (void) bytes_written; + const std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now(); func(); + const std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now(); + return std::chrono::duration_cast(end - start).count(); #endif // CYCLE_PROFILING } }; diff --git a/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp b/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp index 68a5c6a178..0c234431b1 100644 --- a/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp +++ b/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp @@ -30,9 +30,12 @@ void PrintTensor(const Tensor4D& tensor); void PrintWeights(const Tensor4D& weights); // Test the equivalence of two tensors -bool CmpTensors(const Tensor4D& a, - const Tensor4D& b, - const float max_delta=0.0f); +// Counts the instances that |a - b|/|a| > max_err +bool CmpTensors( + const Tensor4D& a, + const Tensor4D& b, + const float max_err=0.0f +); // Fill the tensor with a test pattern void TestPattern(Tensor4D& tensor); diff --git a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp b/arm_compute/core/NEON/kernels/convolution/common/utils.hpp index a22809fb58..5f42719119 100644 --- a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp +++ b/arm_compute/core/NEON/kernels/convolution/common/utils.hpp @@ -24,7 +24,6 @@ #pragma once -double TimeInUs(void); void PrintMatrix(const float *const m, const int M, const int N, const int row_stride); inline int iceildiv(const int a, const int b) -- cgit v1.2.1