aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/Profiling.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/armnn/Profiling.cpp')
-rw-r--r--src/armnn/Profiling.cpp293
1 files changed, 293 insertions, 0 deletions
diff --git a/src/armnn/Profiling.cpp b/src/armnn/Profiling.cpp
new file mode 100644
index 0000000000..15a195e6bd
--- /dev/null
+++ b/src/armnn/Profiling.cpp
@@ -0,0 +1,293 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include "Profiling.hpp"
+
+#if ARMNN_PROFILING_ENABLED
+
+#if ARMNN_STREAMLINE_ENABLED
+#include <streamline_annotate.h>
+#endif
+
+#if ARMCOMPUTECL_ENABLED
+#include <arm_compute/runtime/CL/CLFunctions.h>
+#endif
+
+#include <algorithm>
+#include <iomanip>
+#include <iostream>
+#include <map>
+#include <stack>
+#include <boost/algorithm/string.hpp>
+
+namespace armnn
+{
+
+// Controls the amount of memory initially allocated to store profiling events.
+// If chosen carefully, the profiling system will not make any additional allocations, thus minimizing its impact on
+// measured times.
+constexpr std::size_t g_ProfilingEventCountHint = 1024;
+
+// Whether profiling reports should include the sequence of events together with their timings.
+constexpr bool g_WriteProfilingEventSequence = true;
+
+// Whether profiling reports should also report detailed information on events grouped by tag.
+// This is used to group stats per inference (see usage of ARMNN_UPDATE_PROFILING_EVENT_TAG in
+// Runtime::EnqueueWorkload). This can spam the output stream, so use carefully (or adapt
+// the code to just output information for a tag of interest).
+constexpr bool g_AggregateProfilingEventsByTag = false;
+
+// Whether a call to Profiler::AnalyzeEventsAndWriteResults() will be made when the Profiler
+// singleton is destroyed. It can be convenient for local tests.
+constexpr bool g_WriteReportToStdOutOnProfilerDestruction = true;
+
+// Whether events denoting operations running on the GPU should force a sync before/after the event.
+// This is hardcoded to true for now as the profiling timings are not very useful without it.
+constexpr bool g_ProfilingForceGpuSync = true;
+
+std::map<std::string, Profiler::ProfilingEventStats> Profiler::CalculateProfilingEventStats() const
+{
+ std::map<std::string, ProfilingEventStats> nameToStatsMap;
+
+ for (auto&& event : m_EventSequence)
+ {
+ auto mapIter = nameToStatsMap.find(event.m_Label);
+ if (mapIter != nameToStatsMap.end())
+ {
+ ProfilingEventStats& stats = mapIter->second;
+ stats.m_TotalMs += event.DurationMs();
+ stats.m_MinMs = std::min(stats.m_MinMs, event.DurationMs());
+ stats.m_MaxMs = std::max(stats.m_MaxMs, event.DurationMs());
+ ++stats.m_Count;
+ }
+ else
+ {
+ ProfilingEventStats stats;
+ stats.m_TotalMs = event.DurationMs();
+ stats.m_MinMs = event.DurationMs();
+ stats.m_MaxMs = event.DurationMs();
+ stats.m_Count = 1;
+
+ nameToStatsMap[event.m_Label] = stats;
+ }
+ }
+
+ return nameToStatsMap;
+}
+
+void Profiler::AnalyzeEventSequenceAndWriteResults(std::vector<ProfilingEvent>::const_iterator first,
+ std::vector<ProfilingEvent>::const_iterator last,
+ std::ostream& outStream) const
+{
+ // Output event sequence, if needed
+ if (g_WriteProfilingEventSequence)
+ {
+ // Make sure timestamps are output with 6 decimals, and save old settings
+ std::streamsize oldPrecision = outStream.precision();
+ outStream.precision(6);
+ std::ios_base::fmtflags oldFlags = outStream.flags();
+ outStream.setf(std::ios::fixed);
+ // Output fields
+ outStream << "Event Sequence - Name | Duration (ms) | Start (ms) | Stop (ms) | Device" << std::endl;
+ for (auto event = first; event != last; ++event)
+ {
+ std::chrono::duration<double, std::milli> startTimeMs = event->m_StartTime.time_since_epoch();
+ std::chrono::duration<double, std::milli> stopTimeMs = event->m_StopTime.time_since_epoch();
+
+ outStream << std::setw(50) << event->m_Label << " "
+ << std::setw(20) << event->DurationMs()
+ << std::setw(20) << startTimeMs.count()
+ << std::setw(20) << stopTimeMs.count()
+ << std::setw(20) << Profiler::Get().GetEventComputeDevice(event->m_Device)
+ << std::endl;
+ }
+ outStream << std::endl;
+ // Restore previous precision settings
+ outStream.flags(oldFlags);
+ outStream.precision(oldPrecision);
+ }
+
+ // Aggregate results per event name
+ std::map<std::string, ProfilingEventStats> nameToStatsMap = CalculateProfilingEventStats();
+
+ // Output aggregated stats
+ outStream << "Event Stats - Name | Avg (ms) | Min (ms) | Max (ms) | Total (ms) | Count" << std::endl;
+ for (const auto& pair : nameToStatsMap)
+ {
+ const std::string& eventLabel = pair.first;
+ const ProfilingEventStats& eventStats = pair.second;
+ const double avgMs = eventStats.m_TotalMs / double(eventStats.m_Count);
+
+ outStream << "\t" << std::setw(50) << eventLabel << " " << std::setw(9) << avgMs << " "
+ << std::setw(9) << eventStats.m_MinMs << " " << std::setw(9) << eventStats.m_MaxMs << " "
+ << std::setw(9) << eventStats.m_TotalMs << " " << std::setw(9) << eventStats.m_Count << std::endl;
+ }
+ outStream << std::endl;
+}
+
+Profiler Profiler::s_Instance;
+
+Profiler::Profiler()
+ : m_EventTag(0)
+ , m_NestingLevel(0)
+ , m_EventTagUpdated(false)
+{
+ m_EventSequence.reserve(g_ProfilingEventCountHint);
+
+#if ARMNN_STREAMLINE_ENABLED
+ // Initialise streamline annotations
+ ANNOTATE_SETUP;
+#endif
+}
+
+Profiler::~Profiler()
+{
+ if (g_WriteReportToStdOutOnProfilerDestruction)
+ {
+ AnalyzeEventsAndWriteResults(std::cout);
+ }
+}
+
+void Profiler::BeginEvent(Compute compute, const std::string label)
+{
+ // We need to sync just before the begin event to not include time before the period we want to time.
+ WaitForDevice(compute);
+
+ const TimePoint timeStamp = Clock::now();
+ m_ObservedMarkers.emplace(Marker{m_EventSequence.size(), label, timeStamp, compute, m_EventTag});
+ m_EventSequence.emplace_back();
+
+#if ARMNN_STREAMLINE_ENABLED
+ ANNOTATE_CHANNEL_COLOR(m_NestingLevel, GetEventColor(compute), label.c_str());
+#endif
+
+ m_NestingLevel++;
+}
+
+void Profiler::EndEvent(Compute compute)
+{
+ // We need to sync just before the end event to include all the time of the timed period.
+ WaitForDevice(compute);
+
+ const Marker& marker = m_ObservedMarkers.top();
+
+ const TimePoint startTime = marker.m_TimeStamp;
+ const TimePoint stopTime = Clock::now();
+
+ m_EventSequence[marker.m_Id] = {std::move(marker.m_EventName),
+ startTime,
+ stopTime,
+ marker.m_ComputeDevice,
+ marker.m_Tag};
+
+ m_ObservedMarkers.pop();
+
+#if ARMNN_STREAMLINE_ENABLED
+ ANNOTATE_CHANNEL_END(m_NestingLevel);
+#endif
+
+ m_NestingLevel--;
+}
+
+void Profiler::AnalyzeEventsAndWriteResults(std::ostream& outStream) const
+{
+ // Stack should be empty now.
+ const bool saneMarkerSequence = m_ObservedMarkers.empty();
+
+ // Abort if the sequence of markers was found to have incorrect information:
+ // The stats cannot be trusted.
+ if (!saneMarkerSequence)
+ {
+ outStream << "Cannot write profiling stats. "
+ "Unexpected errors were found when analyzing the sequence of logged events, which may lead to plainly "
+ "wrong stats. The profiling system may contain implementation issues or could have been used in an "
+ "unsafe manner." << std::endl;
+ return;
+ }
+
+ // Analyze the full sequence of events
+ AnalyzeEventSequenceAndWriteResults(m_EventSequence.begin(), m_EventSequence.end(), outStream);
+
+ // Aggregate events by tag if requested (spams the output stream if done for all tags)
+ if (m_EventTagUpdated && g_AggregateProfilingEventsByTag)
+ {
+ outStream << std::endl;
+ outStream << "***" << std::endl;
+ outStream << "*** Per Tag Stats" << std::endl;
+ outStream << "***" << std::endl;
+ outStream << std::endl;
+
+ for (auto iter = m_EventSequence.begin(); iter != m_EventSequence.end();)
+ {
+ const uint32_t tag = iter->m_Tag;
+
+ // Advance iter until we find the first non-matching tag
+ auto tagEndIter = iter;
+ for (; tagEndIter != m_EventSequence.end(); ++tagEndIter)
+ {
+ if (tagEndIter->m_Tag != tag)
+ {
+ break;
+ }
+ }
+
+ outStream << "> Begin Tag: " << tag << std::endl;
+ outStream << std::endl;
+ AnalyzeEventSequenceAndWriteResults(iter, tagEndIter, outStream);
+ outStream << std::endl;
+ outStream << "> End Tag: " << tag << std::endl;
+
+ iter = tagEndIter;
+ }
+ }
+}
+
+void Profiler::WaitForDevice(Compute compute) const
+{
+#if ARMCOMPUTECL_ENABLED
+ if(compute == Compute::GpuAcc && g_ProfilingForceGpuSync)
+ {
+ arm_compute::CLScheduler::get().sync();
+ }
+#endif
+}
+
+const char* Profiler::GetEventComputeDevice(Compute compute) const
+{
+ switch(compute)
+ {
+ case Compute::CpuRef:
+ return "CpuRef";
+ case Compute::CpuAcc:
+ return "CpuAcc";
+ case Compute::GpuAcc:
+ return "GpuAcc";
+ default:
+ return "Undefined";
+ }
+}
+
+std::uint32_t Profiler::GetEventColor(Compute compute) const
+{
+ switch(compute)
+ {
+ case Compute::CpuRef:
+ // Cyan
+ return 0xffff001b;
+ case Compute::CpuAcc:
+ // Green
+ return 0x00ff001b;
+ case Compute::GpuAcc:
+ // Purple
+ return 0xff007f1b;
+ default:
+ // Dark gray
+ return 0x5555551b;
+ }
+}
+
+} // namespace armnn
+
+#endif // ARMNN_PROFILING_ENABLED
+