From b3b1e0b3e349ae607297bbca3f273d3d0dd19679 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 27 Apr 2018 18:49:44 +0100 Subject: COMPMID-1010: Remove RSH profiler header Change-Id: I2967ec94c3bead0b92ff1d1581ff6afea21c7f04 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/129405 Tested-by: Jenkins Reviewed-by: Michalis Spyrou --- .../NEON/kernels/convolution/common/profiler.hpp | 341 --------------------- .../kernels/convolution/winograd/winograd_gemm.hpp | 3 - 2 files changed, 344 deletions(-) delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/profiler.hpp (limited to 'arm_compute/core/NEON/kernels/convolution') diff --git a/arm_compute/core/NEON/kernels/convolution/common/profiler.hpp b/arm_compute/core/NEON/kernels/convolution/common/profiler.hpp deleted file mode 100644 index c6897e3771..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/profiler.hpp +++ /dev/null @@ -1,341 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include -#include -#include -#include - -#ifdef CYCLE_PROFILING -#include -#include -#include -#include -#include -#include - -#include "perf.h" -#endif // CYCLE_PROFILING - -#ifdef CYCLE_PROFILING -class EventIDContainer -{ - public: - EventIDContainer() : container_lock(), event_ids() - { - } - - int get_event_id(const char *id) - { - std::lock_guard lock(container_lock); - if (!event_ids.count(id)) { - event_ids.emplace(id, event_ids.size()); - } - return event_ids[id]; - } - - unsigned int size() const - { - return event_ids.size(); - } - - auto begin() - { - return event_ids.begin(); - } - - auto end() - { - return event_ids.end(); - } - - private: - std::mutex container_lock; - std::map event_ids; -}; - - -class ThreadEventCounterContainer -{ - public: - ThreadEventCounterContainer() : container_lock(), thread_counter_fds() - { - } - - int get_counter_fd() - { - const auto id = std::this_thread::get_id(); - std::lock_guard lock(container_lock); - if (!thread_counter_fds.count(id)) - { - thread_counter_fds.emplace(id, open_cycle_counter()); - } - return thread_counter_fds[id]; - } - - ~ThreadEventCounterContainer() - { - // Close all counter file descriptors - for (auto& fd : thread_counter_fds) - { - close(fd.second); - } - } - - private: - std::mutex container_lock; - std::map thread_counter_fds; -}; -#endif // CYCLE_PROFILING - - -class profiler { -private: -#ifdef CYCLE_PROFILING - struct ProfileEntry { - int event_id; - long int bytes_read, ops, bytes_written; - long int duration; - }; - - static const int maxevents = 10000; - ProfileEntry events[maxevents]; - int currentevent; - std::mutex event_lock; - - EventIDContainer event_ids; - ThreadEventCounterContainer thread_counter_fds; - - int get_event_id(const char *id) - { - return event_ids.get_event_id(id); - } -#endif // CYCLE_PROFILING - -public: -#ifdef CYCLE_PROFILING - profiler() : - currentevent(0), - event_lock(), - event_ids(), - thread_counter_fds() - { - } - - ~profiler() { - std::lock_guard lock_events(event_lock); - - // Compute performance from recorded events - struct ProfileResult { - ProfileResult() : total_calls(0), - total_duration(0), - total_bytes_read(0), - total_ops(0), - total_bytes_written(0) { - } - - void operator+=(const ProfileEntry &rhs) { - total_calls++; - total_duration += rhs.duration; - total_bytes_read += rhs.bytes_read; - total_ops += rhs.ops; - total_bytes_written = rhs.bytes_written; - } - - float avg_duration(void) const { - return static_cast(total_duration) / - static_cast(total_calls); - } - - float bytes_read_per_cycle(void) const { - return static_cast(total_bytes_read) / - static_cast(total_duration); - } - - float ops_per_cycle(void) const { - return static_cast(total_ops) / - static_cast(total_duration); - } - - float bytes_written_per_cycle(void) const { - return static_cast(total_bytes_written) / - static_cast(total_duration); - } - - long int total_calls, - total_duration, - total_bytes_read, - total_ops, - total_bytes_written; - }; - - std::vector totals; - totals.resize(event_ids.size()); - for (int i = 0; i < currentevent; i++) { - const auto &event = events[i]; - totals[event.event_id] += event; - } - - // Get the longest label - int len_label = 0; - for (const auto &kv : event_ids) { - len_label = std::max(len_label, static_cast(strlen(kv.first))); - } - - // Get the longest values for every other field - const auto get_length_of_field = - [totals] (const char *title, auto f, auto len) -> size_t { - size_t l = strlen(title); - for (const auto &v : totals) { - l = std::max(l, len(f(v))); - } - return l; - }; - - // Get the strlen for an int - const auto intlen = [] (long int x) -> size_t { - size_t len = 0; - do { - x /= 10; - len++; - } while (x); - return len; - }; - - // Get the strlen for a float - const auto floatlen = [] (const int precision) { - return [precision] (float x) { - size_t len = 0; - - if (!std::isfinite(x)) { - return static_cast(3); - } - - do { - x /= 10.0f; - len++; - } while (x > 1.0f); - return len + 1 + precision; - }; - }; - - const int len_calls = get_length_of_field( - "Calls", [] (const auto &v) {return v.total_calls;}, - intlen - ); - const int len_duration = get_length_of_field( - "Duration", [] (const auto &v) {return v.total_duration;}, - intlen - ); - const int len_average_duration = get_length_of_field( - "Average", [] (const auto &v) {return v.avg_duration();}, - floatlen(2) - ); - const int len_reads_per_cycle = get_length_of_field( - "Reads / cycle", - [] (const auto &v) {return v.bytes_read_per_cycle();}, - floatlen(6) - ); - const int len_ops_per_cycle = get_length_of_field( - "Ops / cycle", - [] (const auto &v) {return v.ops_per_cycle();}, - floatlen(6) - ); - const int len_writes_per_cycle = get_length_of_field( - "Writes / cycle", - [] (const auto &v) {return v.bytes_written_per_cycle();}, - floatlen(6) - ); - - // Print header - printf( - "%*s %*s %*s %*s %*s %*s %*s\n", - len_label, "", - len_calls, "Calls", - len_duration, "Duration", - len_average_duration, "Average", - len_reads_per_cycle, "Reads / cycle", - len_ops_per_cycle, "Ops / cycle", - len_writes_per_cycle, "Writes / cycle" - ); - for (const auto &kv : event_ids) { - const auto id = kv.second; - printf( - "%*s %*ld %*ld %*.2f %*.6f %*.6f %*.6f\n", - len_label, kv.first, - len_calls, totals[id].total_calls, - len_duration, totals[id].total_duration, - len_average_duration, totals[id].avg_duration(), - len_reads_per_cycle, totals[id].bytes_read_per_cycle(), - len_ops_per_cycle, totals[id].ops_per_cycle(), - len_writes_per_cycle, totals[id].bytes_written_per_cycle() - ); - } - printf("\n"); - } -#endif // CYCLE_PROFILING - - template - double operator() (const char * event, - T func, - long int bytes_read = 0, - long int ops = 0, - long int bytes_written = 0) { -#ifdef CYCLE_PROFILING - if (currentevent==maxevents) { - const std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now(); - func(); - const std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now(); - return std::chrono::duration_cast(end - start).count(); - } else { - const auto countfd = thread_counter_fds.get_counter_fd(); - const std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now(); - start_counter(countfd); - func(); - long long cycs = stop_counter(countfd); - const std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now(); - return std::chrono::duration_cast(end - start).count(); - - // Store the profiling data - std::lock_guard lock_events(event_lock); - events[currentevent++] = { - get_event_id(event), bytes_read, ops, bytes_written, cycs - }; - - return duration_us; - } -#else - (void) event; - (void) bytes_read; - (void) ops; - (void) bytes_written; - const std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now(); - func(); - const std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now(); - return std::chrono::duration_cast(end - start).count(); -#endif // CYCLE_PROFILING - } -}; diff --git a/arm_compute/core/NEON/kernels/convolution/winograd/winograd_gemm.hpp b/arm_compute/core/NEON/kernels/convolution/winograd/winograd_gemm.hpp index f3b2bb10ed..dd67e97035 100644 --- a/arm_compute/core/NEON/kernels/convolution/winograd/winograd_gemm.hpp +++ b/arm_compute/core/NEON/kernels/convolution/winograd/winograd_gemm.hpp @@ -27,7 +27,6 @@ #include "arm_compute/core/NEON/kernels/convolution/common/alloc.hpp" #include "arm_compute/core/NEON/kernels/convolution/common/convolution.hpp" #include "gemm.hpp" -#include "arm_compute/core/NEON/kernels/convolution/common/profiler.hpp" #include "arm_compute/core/NEON/kernels/convolution/common/shims.hpp" #include "arm_compute/core/NEON/kernels/convolution/common/tensor.hpp" #include "arm_compute/core/NEON/kernels/convolution/common/utils.hpp" @@ -439,8 +438,6 @@ class WinogradGEMM const int tile_rows; /** Number of rows of tiles. */ const int tile_cols; /** Number of columns of tiles. */ const int M, K, N; /** Sizes of underlying fundamental matrix multiplications. */ - - profiler prof; }; }; -- cgit v1.2.1