diff options
Diffstat (limited to 'src/armnn/Profiling.cpp')
-rw-r--r-- | src/armnn/Profiling.cpp | 293 |
1 files changed, 293 insertions, 0 deletions
diff --git a/src/armnn/Profiling.cpp b/src/armnn/Profiling.cpp new file mode 100644 index 0000000000..15a195e6bd --- /dev/null +++ b/src/armnn/Profiling.cpp @@ -0,0 +1,293 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "Profiling.hpp" + +#if ARMNN_PROFILING_ENABLED + +#if ARMNN_STREAMLINE_ENABLED +#include <streamline_annotate.h> +#endif + +#if ARMCOMPUTECL_ENABLED +#include <arm_compute/runtime/CL/CLFunctions.h> +#endif + +#include <algorithm> +#include <iomanip> +#include <iostream> +#include <map> +#include <stack> +#include <boost/algorithm/string.hpp> + +namespace armnn +{ + +// Controls the amount of memory initially allocated to store profiling events. +// If chosen carefully, the profiling system will not make any additional allocations, thus minimizing its impact on +// measured times. +constexpr std::size_t g_ProfilingEventCountHint = 1024; + +// Whether profiling reports should include the sequence of events together with their timings. +constexpr bool g_WriteProfilingEventSequence = true; + +// Whether profiling reports should also report detailed information on events grouped by tag. +// This is used to group stats per inference (see usage of ARMNN_UPDATE_PROFILING_EVENT_TAG in +// Runtime::EnqueueWorkload). This can spam the output stream, so use carefully (or adapt +// the code to just output information for a tag of interest). +constexpr bool g_AggregateProfilingEventsByTag = false; + +// Whether a call to Profiler::AnalyzeEventsAndWriteResults() will be made when the Profiler +// singleton is destroyed. It can be convenient for local tests. +constexpr bool g_WriteReportToStdOutOnProfilerDestruction = true; + +// Whether events denoting operations running on the GPU should force a sync before/after the event. +// This is hardcoded to true for now as the profiling timings are not very useful without it. +constexpr bool g_ProfilingForceGpuSync = true; + +std::map<std::string, Profiler::ProfilingEventStats> Profiler::CalculateProfilingEventStats() const +{ + std::map<std::string, ProfilingEventStats> nameToStatsMap; + + for (auto&& event : m_EventSequence) + { + auto mapIter = nameToStatsMap.find(event.m_Label); + if (mapIter != nameToStatsMap.end()) + { + ProfilingEventStats& stats = mapIter->second; + stats.m_TotalMs += event.DurationMs(); + stats.m_MinMs = std::min(stats.m_MinMs, event.DurationMs()); + stats.m_MaxMs = std::max(stats.m_MaxMs, event.DurationMs()); + ++stats.m_Count; + } + else + { + ProfilingEventStats stats; + stats.m_TotalMs = event.DurationMs(); + stats.m_MinMs = event.DurationMs(); + stats.m_MaxMs = event.DurationMs(); + stats.m_Count = 1; + + nameToStatsMap[event.m_Label] = stats; + } + } + + return nameToStatsMap; +} + +void Profiler::AnalyzeEventSequenceAndWriteResults(std::vector<ProfilingEvent>::const_iterator first, + std::vector<ProfilingEvent>::const_iterator last, + std::ostream& outStream) const +{ + // Output event sequence, if needed + if (g_WriteProfilingEventSequence) + { + // Make sure timestamps are output with 6 decimals, and save old settings + std::streamsize oldPrecision = outStream.precision(); + outStream.precision(6); + std::ios_base::fmtflags oldFlags = outStream.flags(); + outStream.setf(std::ios::fixed); + // Output fields + outStream << "Event Sequence - Name | Duration (ms) | Start (ms) | Stop (ms) | Device" << std::endl; + for (auto event = first; event != last; ++event) + { + std::chrono::duration<double, std::milli> startTimeMs = event->m_StartTime.time_since_epoch(); + std::chrono::duration<double, std::milli> stopTimeMs = event->m_StopTime.time_since_epoch(); + + outStream << std::setw(50) << event->m_Label << " " + << std::setw(20) << event->DurationMs() + << std::setw(20) << startTimeMs.count() + << std::setw(20) << stopTimeMs.count() + << std::setw(20) << Profiler::Get().GetEventComputeDevice(event->m_Device) + << std::endl; + } + outStream << std::endl; + // Restore previous precision settings + outStream.flags(oldFlags); + outStream.precision(oldPrecision); + } + + // Aggregate results per event name + std::map<std::string, ProfilingEventStats> nameToStatsMap = CalculateProfilingEventStats(); + + // Output aggregated stats + outStream << "Event Stats - Name | Avg (ms) | Min (ms) | Max (ms) | Total (ms) | Count" << std::endl; + for (const auto& pair : nameToStatsMap) + { + const std::string& eventLabel = pair.first; + const ProfilingEventStats& eventStats = pair.second; + const double avgMs = eventStats.m_TotalMs / double(eventStats.m_Count); + + outStream << "\t" << std::setw(50) << eventLabel << " " << std::setw(9) << avgMs << " " + << std::setw(9) << eventStats.m_MinMs << " " << std::setw(9) << eventStats.m_MaxMs << " " + << std::setw(9) << eventStats.m_TotalMs << " " << std::setw(9) << eventStats.m_Count << std::endl; + } + outStream << std::endl; +} + +Profiler Profiler::s_Instance; + +Profiler::Profiler() + : m_EventTag(0) + , m_NestingLevel(0) + , m_EventTagUpdated(false) +{ + m_EventSequence.reserve(g_ProfilingEventCountHint); + +#if ARMNN_STREAMLINE_ENABLED + // Initialise streamline annotations + ANNOTATE_SETUP; +#endif +} + +Profiler::~Profiler() +{ + if (g_WriteReportToStdOutOnProfilerDestruction) + { + AnalyzeEventsAndWriteResults(std::cout); + } +} + +void Profiler::BeginEvent(Compute compute, const std::string label) +{ + // We need to sync just before the begin event to not include time before the period we want to time. + WaitForDevice(compute); + + const TimePoint timeStamp = Clock::now(); + m_ObservedMarkers.emplace(Marker{m_EventSequence.size(), label, timeStamp, compute, m_EventTag}); + m_EventSequence.emplace_back(); + +#if ARMNN_STREAMLINE_ENABLED + ANNOTATE_CHANNEL_COLOR(m_NestingLevel, GetEventColor(compute), label.c_str()); +#endif + + m_NestingLevel++; +} + +void Profiler::EndEvent(Compute compute) +{ + // We need to sync just before the end event to include all the time of the timed period. + WaitForDevice(compute); + + const Marker& marker = m_ObservedMarkers.top(); + + const TimePoint startTime = marker.m_TimeStamp; + const TimePoint stopTime = Clock::now(); + + m_EventSequence[marker.m_Id] = {std::move(marker.m_EventName), + startTime, + stopTime, + marker.m_ComputeDevice, + marker.m_Tag}; + + m_ObservedMarkers.pop(); + +#if ARMNN_STREAMLINE_ENABLED + ANNOTATE_CHANNEL_END(m_NestingLevel); +#endif + + m_NestingLevel--; +} + +void Profiler::AnalyzeEventsAndWriteResults(std::ostream& outStream) const +{ + // Stack should be empty now. + const bool saneMarkerSequence = m_ObservedMarkers.empty(); + + // Abort if the sequence of markers was found to have incorrect information: + // The stats cannot be trusted. + if (!saneMarkerSequence) + { + outStream << "Cannot write profiling stats. " + "Unexpected errors were found when analyzing the sequence of logged events, which may lead to plainly " + "wrong stats. The profiling system may contain implementation issues or could have been used in an " + "unsafe manner." << std::endl; + return; + } + + // Analyze the full sequence of events + AnalyzeEventSequenceAndWriteResults(m_EventSequence.begin(), m_EventSequence.end(), outStream); + + // Aggregate events by tag if requested (spams the output stream if done for all tags) + if (m_EventTagUpdated && g_AggregateProfilingEventsByTag) + { + outStream << std::endl; + outStream << "***" << std::endl; + outStream << "*** Per Tag Stats" << std::endl; + outStream << "***" << std::endl; + outStream << std::endl; + + for (auto iter = m_EventSequence.begin(); iter != m_EventSequence.end();) + { + const uint32_t tag = iter->m_Tag; + + // Advance iter until we find the first non-matching tag + auto tagEndIter = iter; + for (; tagEndIter != m_EventSequence.end(); ++tagEndIter) + { + if (tagEndIter->m_Tag != tag) + { + break; + } + } + + outStream << "> Begin Tag: " << tag << std::endl; + outStream << std::endl; + AnalyzeEventSequenceAndWriteResults(iter, tagEndIter, outStream); + outStream << std::endl; + outStream << "> End Tag: " << tag << std::endl; + + iter = tagEndIter; + } + } +} + +void Profiler::WaitForDevice(Compute compute) const +{ +#if ARMCOMPUTECL_ENABLED + if(compute == Compute::GpuAcc && g_ProfilingForceGpuSync) + { + arm_compute::CLScheduler::get().sync(); + } +#endif +} + +const char* Profiler::GetEventComputeDevice(Compute compute) const +{ + switch(compute) + { + case Compute::CpuRef: + return "CpuRef"; + case Compute::CpuAcc: + return "CpuAcc"; + case Compute::GpuAcc: + return "GpuAcc"; + default: + return "Undefined"; + } +} + +std::uint32_t Profiler::GetEventColor(Compute compute) const +{ + switch(compute) + { + case Compute::CpuRef: + // Cyan + return 0xffff001b; + case Compute::CpuAcc: + // Green + return 0x00ff001b; + case Compute::GpuAcc: + // Purple + return 0xff007f1b; + default: + // Dark gray + return 0x5555551b; + } +} + +} // namespace armnn + +#endif // ARMNN_PROFILING_ENABLED + |