ArmNN
 21.02
Profiling.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include "Profiling.hpp"
6 
7 #include <armnn/BackendId.hpp>
10 
11 #include "JsonPrinter.hpp"
12 
13 #if ARMNN_STREAMLINE_ENABLED
14 #include <streamline_annotate.h>
15 #endif
16 
17 #include <algorithm>
18 #include <iomanip>
19 #include <iostream>
20 #include <fstream>
21 #include <map>
22 #include <stack>
23 
24 namespace armnn
25 {
26 
27 // Controls the amount of memory initially allocated to store profiling events.
28 // If chosen carefully, the profiling system will not make any additional allocations, thus minimizing its impact on
29 // measured times.
30 constexpr std::size_t g_ProfilingEventCountHint = 1024;
31 
32 // Whether profiling reports should include the sequence of events together with their timings.
33 constexpr bool g_WriteProfilingEventSequence = true;
34 
35 // Whether profiling reports should also report detailed information on events grouped by inference.
36 // This can spam the output stream, so use carefully (or adapt the code to just output information
37 // of interest).
39 
40 // Whether a call to Profiler::AnalyzeEventsAndWriteResults() will be made when the Profiler is destroyed.
41 // It can be convenient for local tests.
43 
44 Measurement FindMeasurement(const std::string& name, const Event* event)
45 {
46 
47  ARMNN_ASSERT(event != nullptr);
48 
49  // Search though the measurements.
50  for (const auto& measurement : event->GetMeasurements())
51  {
52  if (measurement.m_Name == name)
53  {
54  // Measurement found.
55  return measurement;
56  }
57  }
58 
59  // Measurement not found.
60  return Measurement{ "", 0.f, Measurement::Unit::TIME_MS };
61 }
62 
63 std::vector<Measurement> FindKernelMeasurements(const Event* event)
64 {
65  ARMNN_ASSERT(event != nullptr);
66 
67  std::vector<Measurement> measurements;
68 
69  // Search through the measurements.
70  for (const auto& measurement : event->GetMeasurements())
71  {
72  if (measurement.m_Name.rfind("OpenClKernelTimer", 0) == 0
73  || measurement.m_Name.rfind("NeonKernelTimer", 0) == 0)
74  {
75  // Measurement found.
76  measurements.push_back(measurement);
77  }
78  }
79 
80  return measurements;
81 }
82 
83 std::map<std::string, ProfilerImpl::ProfilingEventStats> ProfilerImpl::CalculateProfilingEventStats() const
84 {
85  std::map<std::string, ProfilingEventStats> nameToStatsMap;
86 
87  for (const auto& event : m_EventSequence)
88  {
90 
91  double durationMs = measurement.m_Value;
92  auto it = nameToStatsMap.find(event->GetName());
93  if (it != nameToStatsMap.end())
94  {
95  ProfilingEventStats& stats = it->second;
96  stats.m_TotalMs += durationMs;
97  stats.m_MinMs = std::min(stats.m_MinMs, durationMs);
98  stats.m_MaxMs = std::max(stats.m_MaxMs, durationMs);
99  ++stats.m_Count;
100  }
101  else
102  {
103  nameToStatsMap.emplace(event->GetName(), ProfilingEventStats{ durationMs, durationMs, durationMs, 1 });
104  }
105  }
106 
107  return nameToStatsMap;
108 }
109 
110 const Event* GetEventPtr(const Event* ptr) { return ptr;}
111 const Event* GetEventPtr(const std::unique_ptr<Event>& ptr) {return ptr.get(); }
112 
113 template<typename ItertType>
114 void ProfilerImpl::AnalyzeEventSequenceAndWriteResults(ItertType first, ItertType last, std::ostream& outStream) const
115 {
116  // Outputs event sequence, if needed.
117  if (g_WriteProfilingEventSequence)
118  {
119  // Makes sure timestamps are output with 6 decimals, and save old settings.
120  std::streamsize oldPrecision = outStream.precision();
121  outStream.precision(6);
122  std::ios_base::fmtflags oldFlags = outStream.flags();
123  outStream.setf(std::ios::fixed);
124  // Outputs fields.
125  outStream << "Event Sequence - Name | Duration (ms) | Start (ms) | Stop (ms) | Device" << std::endl;
126  for (auto event = first; event != last; ++event)
127  {
128  const Event* eventPtr = GetEventPtr((*event));
129  double startTimeMs = FindMeasurement(WallClockTimer::WALL_CLOCK_TIME_START, eventPtr).m_Value;
130  double stopTimeMs = FindMeasurement(WallClockTimer::WALL_CLOCK_TIME_STOP, eventPtr).m_Value;
131 
132  // Find the WallClock measurement if there is one.
133  double durationMs = FindMeasurement(WallClockTimer::WALL_CLOCK_TIME, eventPtr).m_Value;
134  outStream << std::setw(50) << eventPtr->GetName() << " "
135  << std::setw(20) << durationMs
136  << std::setw(20) << startTimeMs
137  << std::setw(20) << stopTimeMs
138  << std::setw(20) << eventPtr->GetBackendId().Get()
139  << std::endl;
140  }
141  outStream << std::endl;
142  // Restores previous precision settings.
143  outStream.flags(oldFlags);
144  outStream.precision(oldPrecision);
145  }
146 
147  // Aggregates results per event name.
148  std::map<std::string, ProfilingEventStats> nameToStatsMap = CalculateProfilingEventStats();
149 
150  // Outputs aggregated stats.
151  outStream << "Event Stats - Name | Avg (ms) | Min (ms) | Max (ms) | Total (ms) | Count" << std::endl;
152  for (const auto& pair : nameToStatsMap)
153  {
154  const std::string& eventLabel = pair.first;
155  const ProfilingEventStats& eventStats = pair.second;
156  const double avgMs = eventStats.m_TotalMs / double(eventStats.m_Count);
157 
158  outStream << "\t" << std::setw(50) << eventLabel << " " << std::setw(9) << avgMs << " "
159  << std::setw(9) << eventStats.m_MinMs << " " << std::setw(9) << eventStats.m_MaxMs << " "
160  << std::setw(9) << eventStats.m_TotalMs << " " << std::setw(9) << eventStats.m_Count << std::endl;
161  }
162  outStream << std::endl;
163 }
164 
167 {
168  m_EventSequence.reserve(g_ProfilingEventCountHint);
169 
170 #if ARMNN_STREAMLINE_ENABLED
171  // Initialises streamline annotations.
172  ANNOTATE_SETUP;
173 #endif
174 }
175 
177 {
178  if (m_ProfilingEnabled)
179  {
180  if (g_WriteReportToStdOutOnProfilerDestruction)
181  {
182  Print(std::cout);
183  }
184  }
185 
186  // Un-register this profiler from the current thread.
188 }
189 
191 {
192  return m_ProfilingEnabled;
193 }
194 
195 void ProfilerImpl::EnableProfiling(bool enableProfiling)
196 {
197  m_ProfilingEnabled = enableProfiling;
198 }
199 
201  const BackendId& backendId,
202  const std::string& label,
203  std::vector<InstrumentPtr>&& instruments)
204 {
205  Event* parent = m_Parents.empty() ? nullptr : m_Parents.top();
206  m_EventSequence.push_back(std::make_unique<Event>(label, profiler, parent, backendId, std::move(instruments)));
207  Event* event = m_EventSequence.back().get();
208  event->Start();
209 
210 #if ARMNN_STREAMLINE_ENABLED
211  ANNOTATE_CHANNEL_COLOR(uint32_t(m_Parents.size()), GetEventColor(backendId), label.c_str());
212 #endif
213 
214  m_Parents.push(event);
215  return event;
216 }
217 
219 {
220  event->Stop();
221 
222  ARMNN_ASSERT(!m_Parents.empty());
223  ARMNN_ASSERT(event == m_Parents.top());
224  m_Parents.pop();
225 
226  Event* parent = m_Parents.empty() ? nullptr : m_Parents.top();
227  IgnoreUnused(parent);
228  ARMNN_ASSERT(event->GetParentEvent() == parent);
229 
230 #if ARMNN_STREAMLINE_ENABLED
231  ANNOTATE_CHANNEL_END(uint32_t(m_Parents.size()));
232 #endif
233 }
234 
235 int CalcLevel(const Event* eventPtr)
236 {
237  int level=0;
238  while (eventPtr != nullptr)
239  {
240  eventPtr = eventPtr->GetParentEvent();
241  level++;
242  }
243  return level;
244 }
245 
246 void ProfilerImpl::PopulateInferences(std::vector<const Event*>& outInferences, int& outBaseLevel) const
247 {
248  outInferences.reserve(m_EventSequence.size());
249  for (const auto& event : m_EventSequence)
250  {
251  const Event* eventPtrRaw = event.get();
252  if (eventPtrRaw->GetName() == "EnqueueWorkload")
253  {
254  outBaseLevel = (outBaseLevel == -1) ? CalcLevel(eventPtrRaw) : outBaseLevel;
255  outInferences.push_back(eventPtrRaw);
256  }
257  }
258 }
259 
260 void ProfilerImpl::PopulateDescendants(std::map<const Event*, std::vector<const Event*>>& outDescendantsMap) const
261 {
262  for (const auto& event : m_EventSequence)
263  {
264  const Event* eventPtrRaw = event.get();
265  const Event* parent = eventPtrRaw->GetParentEvent();
266 
267  if (!parent)
268  {
269  continue;
270  }
271 
272  auto it = outDescendantsMap.find(parent);
273  if (it == outDescendantsMap.end())
274  {
275  outDescendantsMap.emplace(parent, std::vector<const Event*>({eventPtrRaw}));
276  }
277  else
278  {
279  it->second.push_back(eventPtrRaw);
280  }
281  }
282 }
283 
284 
285 void ExtractJsonObjects(unsigned int inferenceIndex,
286  const Event* parentEvent,
287  JsonChildObject& parentObject,
288  std::map<const Event*, std::vector<const Event*>> descendantsMap)
289 {
290  ARMNN_ASSERT(parentEvent);
291  std::vector<Measurement> instrumentMeasurements = parentEvent->GetMeasurements();
292  unsigned int childIdx=0;
293  for(size_t measurementIndex = 0; measurementIndex < instrumentMeasurements.size(); ++measurementIndex, ++childIdx)
294  {
295  if (inferenceIndex == 0)
296  {
297  // Only add kernel measurement once, in case of multiple inferences
298  JsonChildObject measurementObject{instrumentMeasurements[measurementIndex].m_Name};
299  measurementObject.SetUnit(instrumentMeasurements[measurementIndex].m_Unit);
300  measurementObject.SetType(JsonObjectType::Measurement);
301 
302  ARMNN_ASSERT(parentObject.NumChildren() == childIdx);
303  parentObject.AddChild(measurementObject);
304  }
305 
306  parentObject.GetChild(childIdx).AddMeasurement(instrumentMeasurements[measurementIndex].m_Value);
307  }
308 
309 
310  auto childEventsIt = descendantsMap.find(parentEvent);
311  if (childEventsIt != descendantsMap.end())
312  {
313  for (auto childEvent : childEventsIt->second)
314  {
315  if (inferenceIndex == 0)
316  {
317  // Only add second level once, in case of multiple inferences
318  JsonChildObject childObject{childEvent->GetName()};
319  childObject.SetType(JsonObjectType::Event);
320  parentObject.AddChild(childObject);
321  }
322 
323  // Recursively process children. In reality this won't be very deep recursion. ~4-6 levels deep.
324  ExtractJsonObjects(inferenceIndex, childEvent, parentObject.GetChild(childIdx), descendantsMap);
325 
326  childIdx++;
327  }
328  }
329 }
330 
331 void ProfilerImpl::Print(std::ostream& outStream) const
332 {
333  // Makes sure timestamps are output with 6 decimals, and save old settings.
334  std::streamsize oldPrecision = outStream.precision();
335  outStream.precision(6);
336  std::ios_base::fmtflags oldFlags = outStream.flags();
337  outStream.setf(std::ios::fixed);
338  JsonPrinter printer(outStream);
339 
340  // First find all the "inference" Events and print out duration measurements.
341  int baseLevel = -1;
342  std::vector<const Event*> inferences;
343  PopulateInferences(inferences, baseLevel);
344 
345  // Second map out descendants hierarchy
346  std::map<const Event*, std::vector<const Event*>> descendantsMap;
347  PopulateDescendants(descendantsMap);
348 
349  JsonChildObject inferenceObject{"inference_measurements"};
350  JsonChildObject layerObject{"layer_measurements"};
351  std::vector<JsonChildObject> workloadObjects;
352  std::map<unsigned int, std::vector<JsonChildObject>> workloadToKernelObjects;
353 
354  for (unsigned int inferenceIndex = 0; inferenceIndex < inferences.size(); ++inferenceIndex)
355  {
356  auto inference = inferences[inferenceIndex];
357  ExtractJsonObjects(inferenceIndex, inference, inferenceObject, descendantsMap);
358  }
359 
360  printer.PrintHeader();
361  printer.PrintArmNNHeader();
362 
363  // print inference object, also prints child layer and kernel measurements
364  size_t id=0;
365  printer.PrintJsonChildObject(inferenceObject, id);
366 
367  // end of ArmNN
368  printer.PrintNewLine();
369  printer.PrintFooter();
370 
371  // end of main JSON object
372  printer.PrintNewLine();
373  printer.PrintFooter();
374  printer.PrintNewLine();
375 
376  // Restores previous precision settings.
377  outStream.flags(oldFlags);
378  outStream.precision(oldPrecision);
379 }
380 
381 void ProfilerImpl::AnalyzeEventsAndWriteResults(std::ostream& outStream) const
382 {
383  // Stack should be empty now.
384  const bool saneMarkerSequence = m_Parents.empty();
385 
386  // Abort if the sequence of markers was found to have incorrect information:
387  // The stats cannot be trusted.
388  if (!saneMarkerSequence)
389  {
390  outStream << "Cannot write profiling stats. "
391  "Unexpected errors were found when analyzing the sequence of logged events, which may lead to plainly "
392  "wrong stats. The profiling system may contain implementation issues or could have been used in an "
393  "unsafe manner." << std::endl;
394  return;
395  }
396 
397  // Analyzes the full sequence of events.
399  m_EventSequence.cend(),
400  outStream);
401 
402  // Aggregates events by tag if requested (spams the output stream if done for all tags).
403  if (g_AggregateProfilingEventsByInference)
404  {
405  outStream << std::endl;
406  outStream << "***" << std::endl;
407  outStream << "*** Per Inference Stats" << std::endl;
408  outStream << "***" << std::endl;
409  outStream << std::endl;
410 
411  int baseLevel = -1;
412  std::vector<const Event*> inferences;
413  PopulateInferences(inferences, baseLevel);
414 
415  // Second map out descendants hierarchy
416  std::map<const Event*, std::vector<const Event*>> descendantsMap;
417  PopulateDescendants(descendantsMap);
418 
419  std::function<void (const Event*, std::vector<const Event*>&)>
420  FindDescendantEvents = [&](const Event* eventPtr,
421  std::vector<const Event*>& sequence)
422  {
423  sequence.push_back(eventPtr);
424 
425  if (CalcLevel(eventPtr) > baseLevel+2) //We only care about levels as deep as workload executions.
426  {
427  return;
428  }
429 
430  auto children = descendantsMap.find(eventPtr);
431  if (children == descendantsMap.end())
432  {
433  return;
434  }
435 
436  if (!(children->second.empty()))
437  {
438  return FindDescendantEvents(children->second[0], sequence);
439  }
440  };
441 
442  // Third, find events belonging to each inference
443  int inferenceIdx = 0;
444  for (auto inference : inferences)
445  {
446  std::vector<const Event*> sequence;
447 
448  //build sequence, depth first
449  FindDescendantEvents(inference, sequence);
450 
451  outStream << "> Begin Inference: " << inferenceIdx << std::endl;
452  outStream << std::endl;
453  AnalyzeEventSequenceAndWriteResults(sequence.cbegin(),
454  sequence.cend(),
455  outStream);
456  outStream << std::endl;
457  outStream << "> End Inference: " << inferenceIdx << std::endl;
458 
459  inferenceIdx++;
460  }
461  }
462 }
463 
464 std::uint32_t ProfilerImpl::GetEventColor(const BackendId& backendId) const
465 {
466  static BackendId cpuRef("CpuRef");
467  static BackendId cpuAcc("CpuAcc");
468  static BackendId gpuAcc("GpuAcc");
469  if (backendId == cpuRef) {
470  // Cyan
471  return 0xffff001b;
472  } else if (backendId == cpuAcc) {
473  // Green
474  return 0x00ff001b;
475  } else if (backendId == gpuAcc) {
476  // Purple
477  return 0xff007f1b;
478  } else {
479  // Dark gray
480  return 0x5555551b;
481  }
482 }
483 
484 // The thread_local pointer to the profiler instance.
485 thread_local IProfiler* tl_Profiler = nullptr;
486 
487 
488 
490 {
491  // Global reference to the single ProfileManager instance allowed.
492  static ProfilerManager s_ProfilerManager;
493  return s_ProfilerManager;
494 }
495 
497 {
498  tl_Profiler = profiler;
499 }
500 
502 {
503  return tl_Profiler;
504 }
505 
506 
507 void IProfiler::EnableProfiling(bool enableProfiling)
508 {
509  pProfilerImpl->EnableProfiling(enableProfiling);
510 }
511 
513 {
514  return pProfilerImpl->IsProfilingEnabled();
515 }
516 
517 void IProfiler::AnalyzeEventsAndWriteResults(std::ostream& outStream) const
518 {
519  pProfilerImpl->AnalyzeEventsAndWriteResults(outStream);
520 }
521 
522 void IProfiler::Print(std::ostream& outStream) const
523 {
524  pProfilerImpl->Print(outStream);
525 }
526 
527 Event* IProfiler::BeginEvent(const BackendId& backendId,
528  const std::string& label,
529  std::vector<InstrumentPtr>&& instruments)
530 {
531  return pProfilerImpl->BeginEvent(this, backendId, label, std::move(instruments));
532 }
533 
534 IProfiler::~IProfiler() = default;
535 IProfiler::IProfiler() : pProfilerImpl(new ProfilerImpl()) {};
536 
537 } // namespace armnn
JsonChildObject & GetChild(const unsigned int index)
Definition: JsonPrinter.hpp:41
Event class records measurements reported by BeginEvent()/EndEvent() and returns measurements when Ev...
const Event * GetEventPtr(const Event *ptr)
Definition: Profiling.cpp:110
constexpr bool g_WriteReportToStdOutOnProfilerDestruction
Definition: Profiling.cpp:42
void AddChild(const JsonChildObject &childObject)
Definition: JsonPrinter.hpp:36
int CalcLevel(const Event *eventPtr)
Definition: Profiling.cpp:235
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:489
Measurement FindMeasurement(const std::string &name, const Event *event)
Definition: Profiling.cpp:44
DataLayout::NCHW false
uint32_t GetEventColor(const BackendId &backendId) const
Definition: Profiling.cpp:464
std::vector< Measurement > FindKernelMeasurements(const Event *event)
Definition: Profiling.cpp:63
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:522
std::map< std::string, ProfilingEventStats > CalculateProfilingEventStats() const
Definition: Profiling.cpp:83
void AnalyzeEventsAndWriteResults(std::ostream &outStream) const
Analyzes the tracked events and writes the results to the given output stream.
Definition: Profiling.cpp:517
const std::string & GetName() const
Get the name of the event.
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
void AnalyzeEventSequenceAndWriteResults(EventIterType first, EventIterType last, std::ostream &outStream) const
size_t NumChildren() const
Definition: JsonPrinter.hpp:51
IProfiler * GetProfiler()
Definition: Profiling.cpp:501
BackendId GetBackendId() const
Get the backend id of the event.
void AnalyzeEventsAndWriteResults(std::ostream &outStream) const
Definition: Profiling.cpp:381
const std::vector< Measurement > GetMeasurements() const
Get the recorded measurements calculated between Start() and Stop()
void ExtractJsonObjects(unsigned int inferenceIndex, const Event *parentEvent, JsonChildObject &parentObject, std::map< const Event *, std::vector< const Event *>> descendantsMap)
Definition: Profiling.cpp:285
void EnableProfiling(bool enableProfiling)
Enables/disables profiling for this profiler.
Definition: Profiling.cpp:507
static const std::string WALL_CLOCK_TIME_STOP
void SetType(JsonObjectType type)
Definition: JsonPrinter.hpp:56
static const std::string WALL_CLOCK_TIME_START
constexpr std::size_t g_ProfilingEventCountHint
Definition: Profiling.cpp:30
static const std::string WALL_CLOCK_TIME
bool IsProfilingEnabled()
Checks whether profiling is enabled.
Definition: Profiling.cpp:512
void AddMeasurement(const double measurement)
Definition: JsonPrinter.hpp:31
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
void EndEvent(Event *event)
Definition: Profiling.cpp:218
void PrintJsonChildObject(const JsonChildObject &object, size_t &id)
Definition: JsonPrinter.cpp:15
constexpr bool g_WriteProfilingEventSequence
Definition: Profiling.cpp:33
constexpr bool g_AggregateProfilingEventsByInference
Definition: Profiling.cpp:38
void Print(std::ostream &outStream) const
Definition: Profiling.cpp:331
Event * BeginEvent(armnn::IProfiler *profiler, const BackendId &backendId, const std::string &name, std::vector< InstrumentPtr > &&instruments)
Definition: Profiling.cpp:200
void RegisterProfiler(IProfiler *profiler)
Definition: Profiling.cpp:496
const std::string & Get() const
Definition: BackendId.hpp:136
void SetUnit(const Measurement::Unit unit)
Definition: JsonPrinter.hpp:46
void PopulateInferences(std::vector< const Event *> &outInferences, int &outBaseLevel) const
Definition: Profiling.cpp:246
std::stack< Event * > m_Parents
Definition: Profiling.hpp:84
const Event * GetParentEvent() const
Get the pointer of the parent event.
thread_local IProfiler * tl_Profiler
Definition: Profiling.cpp:485
void PopulateDescendants(std::map< const Event *, std::vector< const Event *>> &outDescendantsMap) const
Definition: Profiling.cpp:260
std::vector< EventPtr > m_EventSequence
Definition: Profiling.hpp:85
void EnableProfiling(bool enableProfiling)
Definition: Profiling.cpp:195