COMPMID-1005: Update Depthwise Convolution form RSH

Change-Id: I3033ddb8de183661010d6c71a83f71132037b139 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/124338 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Tello <pablo.tello@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2018-03-13 13:08:12 +0000
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:49:16 +0000
commit: be0ae93c50bfa3e588111585025278daa8cb0694 (patch)
tree: d78c13e8846c31587a5acb70b38b13fa7d03200d /arm_compute/core/NEON/kernels/convolution/common
parent: ae4ce7b411d0f4809ac7d3d90fe89bdb2520dbf6 (diff)
download: ComputeLibrary-be0ae93c50bfa3e588111585025278daa8cb0694.tar.gz
3 files changed, 29 insertions, 12 deletions
diff --git a/arm_compute/core/NEON/kernels/convolution/common/profiler.hpp b/arm_compute/core/NEON/kernels/convolution/common/profiler.hpp
index 01fafa9604..c6897e3771 100644
--- a/arm_compute/core/NEON/kernels/convolution/common/profiler.hpp
+++ b/arm_compute/core/NEON/kernels/convolution/common/profiler.hpp
@@ -24,17 +24,21 @@
 
 #pragma once
 
+#include <cstdio>
+#include <cstring>
+#include <chrono>
+#include <unistd.h>
+
+#ifdef CYCLE_PROFILING
 #include <algorithm>
 #include <cmath>
-#include <cstring>
-#include <cstdio>
 #include <map>
 #include <mutex>
 #include <thread>
 #include <vector>
 
 #include "perf.h"
-#include <unistd.h>
+#endif  // CYCLE_PROFILING
 
 #ifdef CYCLE_PROFILING
 class EventIDContainer
@@ -295,32 +299,43 @@ public:
 #endif  // CYCLE_PROFILING
 
     template <typename T>
-    void operator() (const char * event,
-                     T func,
-                     long int bytes_read = 0,
-                     long int ops = 0,
-                     long int bytes_written = 0) {
+    double operator() (const char * event,
+                       T func,
+                       long int bytes_read = 0,
+                       long int ops = 0,
+                       long int bytes_written = 0) {
 #ifdef CYCLE_PROFILING
         if (currentevent==maxevents) {
+            const std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now();
             func();
+            const std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now();
+            return std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
         } else {
             const auto countfd = thread_counter_fds.get_counter_fd();
+            const std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now();
             start_counter(countfd);
             func();
             long long cycs = stop_counter(countfd);
+            const std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now();
+            return std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
 
             // Store the profiling data
             std::lock_guard<std::mutex> lock_events(event_lock);
             events[currentevent++] = {
               get_event_id(event), bytes_read, ops, bytes_written, cycs
             };
+
+            return duration_us;
         }
 #else
       (void) event;
       (void) bytes_read;
       (void) ops;
       (void) bytes_written;
+      const std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now();
       func();
+      const std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now();
+      return std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
 #endif  // CYCLE_PROFILING
     }
 };
diff --git a/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp b/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp
index 68a5c6a178..0c234431b1 100644
--- a/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp
+++ b/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp
@@ -30,9 +30,12 @@ void PrintTensor(const Tensor4D<Tensor4DShape, float>& tensor);
 void PrintWeights(const Tensor4D<KernelShape, float>& weights);
 
 // Test the equivalence of two tensors
-bool CmpTensors(const Tensor4D<Tensor4DShape, float>& a,
-                const Tensor4D<Tensor4DShape, float>& b,
-                const float max_delta=0.0f);
+// Counts the instances that |a - b|/|a| > max_err
+bool CmpTensors(
+  const Tensor4D<Tensor4DShape, float>& a,
+  const Tensor4D<Tensor4DShape, float>& b,
+  const float max_err=0.0f
+);
 
 // Fill the tensor with a test pattern
 void TestPattern(Tensor4D<Tensor4DShape, float>& tensor);
diff --git a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp b/arm_compute/core/NEON/kernels/convolution/common/utils.hpp
index a22809fb58..5f42719119 100644
--- a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp
+++ b/arm_compute/core/NEON/kernels/convolution/common/utils.hpp
@@ -24,7 +24,6 @@
 
 #pragma once
 
-double TimeInUs(void);
 void PrintMatrix(const float *const m, const int M, const int N, const int row_stride);
 
 inline int iceildiv(const int a, const int b)
author	Georgios Pinitas <georgios.pinitas@arm.com>	2018-03-13 13:08:12 +0000
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:49:16 +0000
commit	be0ae93c50bfa3e588111585025278daa8cb0694 (patch)
tree	d78c13e8846c31587a5acb70b38b13fa7d03200d /arm_compute/core/NEON/kernels/convolution/common
parent	ae4ce7b411d0f4809ac7d3d90fe89bdb2520dbf6 (diff)
download	ComputeLibrary-be0ae93c50bfa3e588111585025278daa8cb0694.tar.gz