11 #if ARMNN_STREAMLINE_ENABLED 12 #include <streamline_annotate.h> 22 #include <boost/algorithm/string.hpp> 23 #include <boost/core/ignore_unused.hpp> 47 BOOST_ASSERT(event !=
nullptr);
52 if (measurement.m_Name == name)
60 return Measurement{
"", 0.f, Measurement::Unit::TIME_MS };
65 BOOST_ASSERT(event !=
nullptr);
67 std::vector<Measurement> measurements;
72 if (measurement.m_Name.rfind(
"OpenClKernelTimer", 0) == 0
73 || measurement.m_Name.rfind(
"NeonKernelTimer", 0) == 0)
76 measurements.push_back(measurement);
83 std::map<std::string, Profiler::ProfilingEventStats> Profiler::CalculateProfilingEventStats()
const 85 std::map<std::string, ProfilingEventStats> nameToStatsMap;
87 for (
const auto& event : m_EventSequence)
91 double durationMs = measurement.
m_Value;
92 auto it = nameToStatsMap.find(event->GetName());
93 if (it != nameToStatsMap.end())
95 ProfilingEventStats& stats = it->second;
96 stats.m_TotalMs += durationMs;
97 stats.m_MinMs = std::min(stats.m_MinMs, durationMs);
98 stats.m_MaxMs = std::max(stats.m_MaxMs, durationMs);
103 nameToStatsMap.emplace(event->GetName(), ProfilingEventStats{ durationMs, durationMs, durationMs, 1 });
107 return nameToStatsMap;
113 template<
typename ItertType>
114 void Profiler::AnalyzeEventSequenceAndWriteResults(ItertType first, ItertType last, std::ostream& outStream)
const 117 if (g_WriteProfilingEventSequence)
120 std::streamsize oldPrecision = outStream.precision();
121 outStream.precision(6);
122 std::ios_base::fmtflags oldFlags = outStream.flags();
123 outStream.setf(std::ios::fixed);
125 outStream <<
"Event Sequence - Name | Duration (ms) | Start (ms) | Stop (ms) | Device" << std::endl;
126 for (
auto event = first;
event != last; ++event)
134 outStream << std::setw(50) << eventPtr->
GetName() <<
" " 135 << std::setw(20) << durationMs
136 << std::setw(20) << startTimeMs
137 << std::setw(20) << stopTimeMs
141 outStream << std::endl;
143 outStream.flags(oldFlags);
144 outStream.precision(oldPrecision);
148 std::map<std::string, ProfilingEventStats> nameToStatsMap = CalculateProfilingEventStats();
151 outStream <<
"Event Stats - Name | Avg (ms) | Min (ms) | Max (ms) | Total (ms) | Count" << std::endl;
152 for (
const auto& pair : nameToStatsMap)
154 const std::string& eventLabel = pair.first;
155 const ProfilingEventStats& eventStats = pair.second;
156 const double avgMs = eventStats.m_TotalMs / double(eventStats.m_Count);
158 outStream <<
"\t" << std::setw(50) << eventLabel <<
" " << std::setw(9) << avgMs <<
" " 159 << std::setw(9) << eventStats.m_MinMs <<
" " << std::setw(9) << eventStats.m_MaxMs <<
" " 160 << std::setw(9) << eventStats.m_TotalMs <<
" " << std::setw(9) << eventStats.m_Count << std::endl;
162 outStream << std::endl;
166 : m_ProfilingEnabled(
false)
168 m_EventSequence.reserve(g_ProfilingEventCountHint);
170 #if ARMNN_STREAMLINE_ENABLED 178 if (m_ProfilingEnabled)
180 if (g_WriteReportToStdOutOnProfilerDestruction)
192 return m_ProfilingEnabled;
197 m_ProfilingEnabled = enableProfiling;
201 const std::string& label,
202 std::vector<InstrumentPtr>&& instruments)
204 Event* parent = m_Parents.empty() ? nullptr : m_Parents.top();
205 m_EventSequence.push_back(std::make_unique<Event>(label,
this, parent, backendId, std::move(instruments)));
206 Event*
event = m_EventSequence.back().get();
209 #if ARMNN_STREAMLINE_ENABLED 210 ANNOTATE_CHANNEL_COLOR(uint32_t(m_Parents.size()),
GetEventColor(backendId), label.c_str());
213 m_Parents.push(event);
221 BOOST_ASSERT(!m_Parents.empty());
222 BOOST_ASSERT(event == m_Parents.top());
225 Event* parent = m_Parents.empty() ? nullptr : m_Parents.top();
226 boost::ignore_unused(parent);
229 #if ARMNN_STREAMLINE_ENABLED 230 ANNOTATE_CHANNEL_END(uint32_t(m_Parents.size()));
237 while (eventPtr !=
nullptr)
245 void Profiler::PopulateInferences(std::vector<const Event*>& outInferences,
int& outBaseLevel)
const 247 outInferences.reserve(m_EventSequence.size());
248 for (
const auto& event : m_EventSequence)
250 const Event* eventPtrRaw =
event.get();
251 if (eventPtrRaw->
GetName() ==
"EnqueueWorkload")
253 outBaseLevel = (outBaseLevel == -1) ?
CalcLevel(eventPtrRaw) : outBaseLevel;
254 outInferences.push_back(eventPtrRaw);
259 void Profiler::PopulateDescendants(std::map<
const Event*, std::vector<const Event*>>& outDescendantsMap)
const 261 for (
const auto& event : m_EventSequence)
263 const Event* eventPtrRaw =
event.get();
271 auto it = outDescendantsMap.find(parent);
272 if (it == outDescendantsMap.end())
274 outDescendantsMap.emplace(parent, std::vector<const Event*>({eventPtrRaw}));
278 it->second.push_back(eventPtrRaw);
285 const Event* parentEvent,
287 std::map<
const Event*, std::vector<const Event*>> descendantsMap)
289 BOOST_ASSERT(parentEvent);
290 std::vector<Measurement> instrumentMeasurements = parentEvent->
GetMeasurements();
291 unsigned int childIdx=0;
292 for(
size_t measurementIndex = 0; measurementIndex < instrumentMeasurements.size(); ++measurementIndex, ++childIdx)
294 if (inferenceIndex == 0)
297 JsonChildObject measurementObject{instrumentMeasurements[measurementIndex].m_Name};
298 measurementObject.
SetUnit(instrumentMeasurements[measurementIndex].m_Unit);
301 BOOST_ASSERT(parentObject.
NumChildren() == childIdx);
302 parentObject.
AddChild(measurementObject);
309 auto childEventsIt = descendantsMap.find(parentEvent);
310 if (childEventsIt != descendantsMap.end())
312 for (
auto childEvent : childEventsIt->second)
314 if (inferenceIndex == 0)
333 std::streamsize oldPrecision = outStream.precision();
334 outStream.precision(6);
335 std::ios_base::fmtflags oldFlags = outStream.flags();
336 outStream.setf(std::ios::fixed);
341 std::vector<const Event*> inferences;
342 PopulateInferences(inferences, baseLevel);
345 std::map<const Event*, std::vector<const Event*>> descendantsMap;
346 PopulateDescendants(descendantsMap);
350 std::vector<JsonChildObject> workloadObjects;
351 std::map<unsigned int, std::vector<JsonChildObject>> workloadToKernelObjects;
353 for (
unsigned int inferenceIndex = 0; inferenceIndex < inferences.size(); ++inferenceIndex)
355 auto inference = inferences[inferenceIndex];
376 outStream.flags(oldFlags);
377 outStream.precision(oldPrecision);
383 const bool saneMarkerSequence = m_Parents.empty();
387 if (!saneMarkerSequence)
389 outStream <<
"Cannot write profiling stats. " 390 "Unexpected errors were found when analyzing the sequence of logged events, which may lead to plainly " 391 "wrong stats. The profiling system may contain implementation issues or could have been used in an " 392 "unsafe manner." << std::endl;
397 AnalyzeEventSequenceAndWriteResults(m_EventSequence.cbegin(),
398 m_EventSequence.cend(),
402 if (g_AggregateProfilingEventsByInference)
404 outStream << std::endl;
405 outStream <<
"***" << std::endl;
406 outStream <<
"*** Per Inference Stats" << std::endl;
407 outStream <<
"***" << std::endl;
408 outStream << std::endl;
411 std::vector<const Event*> inferences;
412 PopulateInferences(inferences, baseLevel);
415 std::map<const Event*, std::vector<const Event*>> descendantsMap;
416 PopulateDescendants(descendantsMap);
418 std::function<void (const Event*, std::vector<const Event*>&)>
419 FindDescendantEvents = [&](
const Event* eventPtr,
420 std::vector<const Event*>& sequence)
422 sequence.push_back(eventPtr);
429 auto children = descendantsMap.find(eventPtr);
430 if (children == descendantsMap.end())
435 for (
const Event* child : children->second)
437 return FindDescendantEvents(child, sequence);
442 int inferenceIdx = 0;
443 for (
auto inference : inferences)
445 std::vector<const Event*> sequence;
448 FindDescendantEvents(inference, sequence);
450 outStream <<
"> Begin Inference: " << inferenceIdx << std::endl;
451 outStream << std::endl;
452 AnalyzeEventSequenceAndWriteResults(sequence.cbegin(),
455 outStream << std::endl;
456 outStream <<
"> End Inference: " << inferenceIdx << std::endl;
468 if (backendId == cpuRef) {
471 }
else if (backendId == cpuAcc) {
474 }
else if (backendId == gpuAcc) {
490 return s_ProfilerManager;
495 tl_Profiler = profiler;
static const std::string WALL_CLOCK_TIME_START
int CalcLevel(const Event *eventPtr)
constexpr std::size_t g_ProfilingEventCountHint
void AddChild(const JsonChildObject &childObject)
Event * BeginEvent(const BackendId &backendId, const std::string &name, std::vector< InstrumentPtr > &&instruments)
void AddMeasurement(const double measurement)
constexpr bool g_WriteReportToStdOutOnProfilerDestruction
void SetUnit(const Measurement::Unit unit)
static const std::string WALL_CLOCK_TIME
thread_local Profiler * tl_Profiler
void EndEvent(Event *event)
void PrintJsonChildObject(const JsonChildObject &object, size_t &id)
BackendId GetBackendId() const
const Event * GetParentEvent() const
JsonChildObject & GetChild(const unsigned int index)
uint32_t GetEventColor(const BackendId &backendId) const
void ExtractJsonObjects(unsigned int inferenceIndex, const Event *parentEvent, JsonChildObject &parentObject, std::map< const Event *, std::vector< const Event *>> descendantsMap)
constexpr bool g_WriteProfilingEventSequence
void EnableProfiling(bool enableProfiling) override
constexpr bool g_AggregateProfilingEventsByInference
void SetType(JsonObjectType type)
const Event * GetEventPtr(const Event *ptr)
size_t NumChildren() const
Measurement FindMeasurement(const std::string &name, const Event *event)
const std::string & GetName() const
void Print(std::ostream &outStream) const override
void AnalyzeEventsAndWriteResults(std::ostream &outStream) const override
void RegisterProfiler(Profiler *profiler)
const std::vector< Measurement > GetMeasurements() const
std::vector< Measurement > FindKernelMeasurements(const Event *event)
static const std::string WALL_CLOCK_TIME_STOP
static ProfilerManager & GetInstance()
bool IsProfilingEnabled() override
const std::string & Get() const