diff options
Diffstat (limited to 'source/application/main/Profiler.cc')
-rw-r--r-- | source/application/main/Profiler.cc | 238 |
1 files changed, 147 insertions, 91 deletions
diff --git a/source/application/main/Profiler.cc b/source/application/main/Profiler.cc index f364759..ce59d9c 100644 --- a/source/application/main/Profiler.cc +++ b/source/application/main/Profiler.cc @@ -17,25 +17,10 @@ #include "Profiler.hpp" #include <cstring> -#include <string> -#include <sstream> +#include <iomanip> namespace arm { namespace app { - - template<class T> - static void writeStatLine(std::ostringstream& s, - const char* desc, - T total, - uint32_t samples, - T min, - T max) - { - s << "\t" << desc << total << " / " - << ((double)total / samples) << " / " - << min << " / " << max << std::endl; - } - Profiler::Profiler(hal_platform* platform, const char* name = "Unknown") : _m_name(name) { @@ -90,99 +75,162 @@ namespace app { void Profiler::Reset() { this->_m_started = false; + this->_m_series.clear(); memset(&this->_m_tstampSt, 0, sizeof(this->_m_tstampSt)); memset(&this->_m_tstampEnd, 0, sizeof(this->_m_tstampEnd)); } - std::string Profiler::GetResultsAndReset() + void calcProfilingStat(uint64_t currentValue, + Statistics& data, + uint32_t samples) { - std::ostringstream strResults; + data.total += currentValue; + data.min = std::min(data.min, currentValue); + data.max = std::max(data.max, currentValue); + data.avrg = ((double)data.total / samples); + } + void Profiler::GetAllResultsAndReset(std::vector<ProfileResult>& results) + { for (const auto& item: this->_m_series) { auto name = item.first; ProfilingSeries series = item.second; - - uint32_t samplesNum = series.size(); - - uint64_t totalNpuCycles = 0; /* Total NPU cycles (idle + active). */ - uint64_t totalActiveNpuCycles = 0; /* Active NPU cycles. */ - uint64_t totalCpuCycles = 0; /* Total CPU cycles. */ - time_t totalTimeMs = 0; - - uint64_t minActiveNpuCycles = series[0].activeNpuCycles; - uint64_t minIdleNpuCycles = series[0].npuCycles - minActiveNpuCycles; - uint64_t minActiveCpuCycles = series[0].cpuCycles - minActiveNpuCycles; - time_t minTimeMs = series[0].time; - - uint64_t maxIdleNpuCycles = 0; - uint64_t maxActiveNpuCycles = 0; - uint64_t maxActiveCpuCycles = 0; - time_t maxTimeMs = 0; - + ProfileResult result{}; + result.name = item.first; + result.samplesNum = series.size(); + + Statistics AXI0_RD { + .name = "NPU AXI0_RD_DATA_BEAT_RECEIVED", + .unit = "cycles", + .total = 0, + .avrg = 0.0, + .min = series[0].axi0writes, + .max = 0 + }; + Statistics AXI0_WR { + .name = "NPU AXI0_WR_DATA_BEAT_WRITTEN", + .unit = "cycles", + .total = 0, + .avrg = 0.0, + .min = series[0].axi0reads, + .max = 0 + }; + Statistics AXI1_RD { + .name = "NPU AXI1_RD_DATA_BEAT_RECEIVED", + .unit = "cycles", + .total = 0, + .avrg = 0.0, + .min = series[0].axi1reads, + .max = 0 + }; + Statistics NPU_ACTIVE { + .name = "NPU ACTIVE", + .unit = "cycles", + .total = 0, + .avrg = 0.0, + .min = series[0].activeNpuCycles, + .max = 0 + }; + Statistics NPU_IDLE { + .name = "NPU IDLE", + .unit = "cycles", + .total = 0, + .avrg = 0.0, + .min = series[0].idleNpuCycles, + .max = 0 + }; + Statistics NPU_Total { + .name = "NPU total", + .unit = "cycles", + .total = 0, + .avrg = 0.0, + .min = series[0].npuCycles, + .max = 0, + }; +#if defined(CPU_PROFILE_ENABLED) + Statistics CPU_ACTIVE { + .name = "CPU ACTIVE", + .unit = "cycles (approx)", + .total = 0, + .avrg = 0.0, + .min = series[0].cpuCycles - NPU_ACTIVE.min, + .max = 0 + }; + Statistics TIME { + .name = "Time", + .unit = "ms", + .total = 0, + .avrg = 0.0, + .min = static_cast<uint64_t>(series[0].time), + .max = 0 + }; +#endif for(ProfilingUnit& unit: series){ - totalNpuCycles += unit.npuCycles; - totalActiveNpuCycles += unit.activeNpuCycles; - totalCpuCycles += unit.cpuCycles; - totalTimeMs += unit.time; - - maxActiveNpuCycles = std::max(maxActiveNpuCycles, - unit.activeNpuCycles); - maxIdleNpuCycles = std::max(maxIdleNpuCycles, - unit.npuCycles - maxActiveNpuCycles); - maxActiveCpuCycles = std::max(maxActiveCpuCycles, - unit.cpuCycles - maxActiveNpuCycles); - maxTimeMs = std::max(maxTimeMs, unit.time); - - minActiveNpuCycles = std::min(minActiveNpuCycles, - unit.activeNpuCycles); - minIdleNpuCycles = std::min(minIdleNpuCycles, - unit.npuCycles - minActiveNpuCycles); - minActiveCpuCycles = std::min(minActiveCpuCycles, - unit.cpuCycles - minActiveNpuCycles); - minTimeMs = std::min(minTimeMs, unit.time); - } - strResults << "Profile for " << name << ": " << std::endl; + calcProfilingStat(unit.npuCycles, + NPU_Total, result.samplesNum); - if (samplesNum > 1) { - strResults << "\tSamples: " << samplesNum << std::endl; - strResults << "\t Total / Avg./ Min / Max" - << std::endl; + calcProfilingStat(unit.activeNpuCycles, + NPU_ACTIVE, result.samplesNum); - writeStatLine<uint64_t>(strResults, "Active NPU cycles: ", - totalActiveNpuCycles, samplesNum, - minActiveNpuCycles, maxActiveNpuCycles); + calcProfilingStat(unit.idleNpuCycles, + NPU_IDLE, result.samplesNum); - writeStatLine<uint64_t>(strResults, "Idle NPU cycles: ", - (totalNpuCycles - totalActiveNpuCycles), - samplesNum, minIdleNpuCycles, maxIdleNpuCycles); + calcProfilingStat(unit.axi0writes, + AXI0_WR, result.samplesNum); -#if defined(CPU_PROFILE_ENABLED) - writeStatLine<uint64_t>(strResults, "Active CPU cycles (approx): ", - (totalCpuCycles - totalActiveNpuCycles), - samplesNum, minActiveCpuCycles, - maxActiveCpuCycles); + calcProfilingStat(unit.axi0reads, + AXI0_RD, result.samplesNum); - writeStatLine<time_t>(strResults, "Time in ms: ", - totalTimeMs, samplesNum, minTimeMs, maxTimeMs); -#endif - } else { - strResults << "\tActive NPU cycles: " << totalActiveNpuCycles - << std::endl; - strResults << "\tIdle NPU cycles: " - << (totalNpuCycles - totalActiveNpuCycles) - << std::endl; + calcProfilingStat(unit.axi1reads, + AXI1_RD, result.samplesNum); #if defined(CPU_PROFILE_ENABLED) - strResults << "\tActive CPU cycles: " - << (totalCpuCycles - totalActiveNpuCycles) - << " (approx)" << std::endl; + calcProfilingStat(static_cast<uint64_t>(unit.time), + TIME, result.samplesNum); - strResults << "\tTime in ms: " << totalTimeMs << std::endl; + calcProfilingStat(unit.cpuCycles - unit.activeNpuCycles, + CPU_ACTIVE, result.samplesNum); #endif } + result.data.emplace_back(AXI0_RD); + result.data.emplace_back(AXI0_WR); + result.data.emplace_back(AXI1_RD); + result.data.emplace_back(NPU_ACTIVE); + result.data.emplace_back(NPU_IDLE); + result.data.emplace_back(NPU_Total); +#if defined(CPU_PROFILE_ENABLED) + result.data.emplace_back(CPU_ACTIVE); + result.data.emplace_back(TIME); +#endif + results.emplace_back(result); } this->Reset(); - return strResults.str(); + } + + void printStatisticsHeader(uint32_t samplesNum) { + info("Number of samples: %i\n", samplesNum); + info("%s\n", "Total / Avg./ Min / Max"); + } + + void Profiler::PrintProfilingResult(bool printFullStat) { + std::vector<ProfileResult> results{}; + GetAllResultsAndReset(results); + for(ProfileResult& result: results) { + info("Profile for %s:\n", result.name.c_str()); + + if (printFullStat) { + printStatisticsHeader(result.samplesNum); + } + + for (Statistics &stat: result.data) { + if (printFullStat) { + info("%s %s: %llu / %.0f / %llu / %llu \n", stat.name.c_str(), stat.unit.c_str(), + stat.total, stat.avrg, stat.min, stat.max); + } else { + info("%s %s: %.0f\n", stat.name.c_str(), stat.unit.c_str(), stat.avrg); + } + } + } } void Profiler::SetName(const char* str) @@ -197,11 +245,19 @@ namespace app { struct ProfilingUnit unit; - if (timer->cap.npu_cycles && timer->get_npu_total_cycle_diff && - timer->get_npu_active_cycle_diff) + if (timer->cap.npu_cycles && timer->get_npu_cycles_diff) { - unit.npuCycles = timer->get_npu_total_cycle_diff(&start, &end); - unit.activeNpuCycles = timer->get_npu_active_cycle_diff(&start, &end); + const size_t size = 6; + uint64_t pmuCounters[size] = {0}; + /* 6 values: total cc, active cc, idle cc, axi0 read, axi0 write, axi1 read*/ + if (0 == timer->get_npu_cycles_diff(&start, &end, pmuCounters, size)) { + unit.npuCycles = pmuCounters[0]; + unit.activeNpuCycles = pmuCounters[1]; + unit.idleNpuCycles = pmuCounters[2]; + unit.axi0reads = pmuCounters[3]; + unit.axi0writes = pmuCounters[4]; + unit.axi1reads = pmuCounters[5]; + } } if (timer->cap.cpu_cycles && timer->get_cpu_cycle_diff) { |