ArmNN
 20.02
Profiling.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include "Profiling.hpp"
6 
7 #include <armnn/BackendId.hpp>
9 
10 #include "JsonPrinter.hpp"
11 
12 #if ARMNN_STREAMLINE_ENABLED
13 #include <streamline_annotate.h>
14 #endif
15 
16 #include <algorithm>
17 #include <iomanip>
18 #include <iostream>
19 #include <fstream>
20 #include <map>
21 #include <stack>
22 
23 #include <boost/algorithm/string.hpp>
24 
25 namespace armnn
26 {
27 
28 // Controls the amount of memory initially allocated to store profiling events.
29 // If chosen carefully, the profiling system will not make any additional allocations, thus minimizing its impact on
30 // measured times.
31 constexpr std::size_t g_ProfilingEventCountHint = 1024;
32 
33 // Whether profiling reports should include the sequence of events together with their timings.
34 constexpr bool g_WriteProfilingEventSequence = true;
35 
36 // Whether profiling reports should also report detailed information on events grouped by inference.
37 // This can spam the output stream, so use carefully (or adapt the code to just output information
38 // of interest).
40 
41 // Whether a call to Profiler::AnalyzeEventsAndWriteResults() will be made when the Profiler is destroyed.
42 // It can be convenient for local tests.
44 
45 Measurement FindMeasurement(const std::string& name, const Event* event)
46 {
47 
48  BOOST_ASSERT(event != nullptr);
49 
50  // Search though the measurements.
51  for (const auto& measurement : event->GetMeasurements())
52  {
53  if (measurement.m_Name == name)
54  {
55  // Measurement found.
56  return measurement;
57  }
58  }
59 
60  // Measurement not found.
61  return Measurement{ "", 0.f, Measurement::Unit::TIME_MS };
62 }
63 
64 std::vector<Measurement> FindKernelMeasurements(const Event* event)
65 {
66  BOOST_ASSERT(event != nullptr);
67 
68  std::vector<Measurement> measurements;
69 
70  // Search through the measurements.
71  for (const auto& measurement : event->GetMeasurements())
72  {
73  if (measurement.m_Name.rfind("OpenClKernelTimer", 0) == 0
74  || measurement.m_Name.rfind("NeonKernelTimer", 0) == 0)
75  {
76  // Measurement found.
77  measurements.push_back(measurement);
78  }
79  }
80 
81  return measurements;
82 }
83 
84 std::map<std::string, Profiler::ProfilingEventStats> Profiler::CalculateProfilingEventStats() const
85 {
86  std::map<std::string, ProfilingEventStats> nameToStatsMap;
87 
88  for (const auto& event : m_EventSequence)
89  {
91 
92  double durationMs = measurement.m_Value;
93  auto it = nameToStatsMap.find(event->GetName());
94  if (it != nameToStatsMap.end())
95  {
96  ProfilingEventStats& stats = it->second;
97  stats.m_TotalMs += durationMs;
98  stats.m_MinMs = std::min(stats.m_MinMs, durationMs);
99  stats.m_MaxMs = std::max(stats.m_MaxMs, durationMs);
100  ++stats.m_Count;
101  }
102  else
103  {
104  nameToStatsMap.emplace(event->GetName(), ProfilingEventStats{ durationMs, durationMs, durationMs, 1 });
105  }
106  }
107 
108  return nameToStatsMap;
109 }
110 
111 const Event* GetEventPtr(const Event* ptr) { return ptr;}
112 const Event* GetEventPtr(const std::unique_ptr<Event>& ptr) {return ptr.get(); }
113 
114 template<typename ItertType>
115 void Profiler::AnalyzeEventSequenceAndWriteResults(ItertType first, ItertType last, std::ostream& outStream) const
116 {
117  // Outputs event sequence, if needed.
118  if (g_WriteProfilingEventSequence)
119  {
120  // Makes sure timestamps are output with 6 decimals, and save old settings.
121  std::streamsize oldPrecision = outStream.precision();
122  outStream.precision(6);
123  std::ios_base::fmtflags oldFlags = outStream.flags();
124  outStream.setf(std::ios::fixed);
125  // Outputs fields.
126  outStream << "Event Sequence - Name | Duration (ms) | Start (ms) | Stop (ms) | Device" << std::endl;
127  for (auto event = first; event != last; ++event)
128  {
129  const Event* eventPtr = GetEventPtr((*event));
130  double startTimeMs = FindMeasurement(WallClockTimer::WALL_CLOCK_TIME_START, eventPtr).m_Value;
131  double stopTimeMs = FindMeasurement(WallClockTimer::WALL_CLOCK_TIME_STOP, eventPtr).m_Value;
132 
133  // Find the WallClock measurement if there is one.
134  double durationMs = FindMeasurement(WallClockTimer::WALL_CLOCK_TIME, eventPtr).m_Value;
135  outStream << std::setw(50) << eventPtr->GetName() << " "
136  << std::setw(20) << durationMs
137  << std::setw(20) << startTimeMs
138  << std::setw(20) << stopTimeMs
139  << std::setw(20) << eventPtr->GetBackendId().Get()
140  << std::endl;
141  }
142  outStream << std::endl;
143  // Restores previous precision settings.
144  outStream.flags(oldFlags);
145  outStream.precision(oldPrecision);
146  }
147 
148  // Aggregates results per event name.
149  std::map<std::string, ProfilingEventStats> nameToStatsMap = CalculateProfilingEventStats();
150 
151  // Outputs aggregated stats.
152  outStream << "Event Stats - Name | Avg (ms) | Min (ms) | Max (ms) | Total (ms) | Count" << std::endl;
153  for (const auto& pair : nameToStatsMap)
154  {
155  const std::string& eventLabel = pair.first;
156  const ProfilingEventStats& eventStats = pair.second;
157  const double avgMs = eventStats.m_TotalMs / double(eventStats.m_Count);
158 
159  outStream << "\t" << std::setw(50) << eventLabel << " " << std::setw(9) << avgMs << " "
160  << std::setw(9) << eventStats.m_MinMs << " " << std::setw(9) << eventStats.m_MaxMs << " "
161  << std::setw(9) << eventStats.m_TotalMs << " " << std::setw(9) << eventStats.m_Count << std::endl;
162  }
163  outStream << std::endl;
164 }
165 
167  : m_ProfilingEnabled(false)
168 {
169  m_EventSequence.reserve(g_ProfilingEventCountHint);
170 
171 #if ARMNN_STREAMLINE_ENABLED
172  // Initialises streamline annotations.
173  ANNOTATE_SETUP;
174 #endif
175 }
176 
178 {
179  if (m_ProfilingEnabled)
180  {
181  if (g_WriteReportToStdOutOnProfilerDestruction)
182  {
183  Print(std::cout);
184  }
185  }
186 
187  // Un-register this profiler from the current thread.
189 }
190 
192 {
193  return m_ProfilingEnabled;
194 }
195 
196 void Profiler::EnableProfiling(bool enableProfiling)
197 {
198  m_ProfilingEnabled = enableProfiling;
199 }
200 
202  const std::string& label,
203  std::vector<InstrumentPtr>&& instruments)
204 {
205  Event* parent = m_Parents.empty() ? nullptr : m_Parents.top();
206  m_EventSequence.push_back(std::make_unique<Event>(label, this, parent, backendId, std::move(instruments)));
207  Event* event = m_EventSequence.back().get();
208  event->Start();
209 
210 #if ARMNN_STREAMLINE_ENABLED
211  ANNOTATE_CHANNEL_COLOR(uint32_t(m_Parents.size()), GetEventColor(backendId), label.c_str());
212 #endif
213 
214  m_Parents.push(event);
215  return event;
216 }
217 
219 {
220  event->Stop();
221 
222  BOOST_ASSERT(!m_Parents.empty());
223  BOOST_ASSERT(event == m_Parents.top());
224  m_Parents.pop();
225 
226  Event* parent = m_Parents.empty() ? nullptr : m_Parents.top();
227  IgnoreUnused(parent);
228  BOOST_ASSERT(event->GetParentEvent() == parent);
229 
230 #if ARMNN_STREAMLINE_ENABLED
231  ANNOTATE_CHANNEL_END(uint32_t(m_Parents.size()));
232 #endif
233 }
234 
235 int CalcLevel(const Event* eventPtr)
236 {
237  int level=0;
238  while (eventPtr != nullptr)
239  {
240  eventPtr = eventPtr->GetParentEvent();
241  level++;
242  }
243  return level;
244 }
245 
246 void Profiler::PopulateInferences(std::vector<const Event*>& outInferences, int& outBaseLevel) const
247 {
248  outInferences.reserve(m_EventSequence.size());
249  for (const auto& event : m_EventSequence)
250  {
251  const Event* eventPtrRaw = event.get();
252  if (eventPtrRaw->GetName() == "EnqueueWorkload")
253  {
254  outBaseLevel = (outBaseLevel == -1) ? CalcLevel(eventPtrRaw) : outBaseLevel;
255  outInferences.push_back(eventPtrRaw);
256  }
257  }
258 }
259 
260 void Profiler::PopulateDescendants(std::map<const Event*, std::vector<const Event*>>& outDescendantsMap) const
261 {
262  for (const auto& event : m_EventSequence)
263  {
264  const Event* eventPtrRaw = event.get();
265  const Event* parent = eventPtrRaw->GetParentEvent();
266 
267  if (!parent)
268  {
269  continue;
270  }
271 
272  auto it = outDescendantsMap.find(parent);
273  if (it == outDescendantsMap.end())
274  {
275  outDescendantsMap.emplace(parent, std::vector<const Event*>({eventPtrRaw}));
276  }
277  else
278  {
279  it->second.push_back(eventPtrRaw);
280  }
281  }
282 }
283 
284 
285 void ExtractJsonObjects(unsigned int inferenceIndex,
286  const Event* parentEvent,
287  JsonChildObject& parentObject,
288  std::map<const Event*, std::vector<const Event*>> descendantsMap)
289 {
290  BOOST_ASSERT(parentEvent);
291  std::vector<Measurement> instrumentMeasurements = parentEvent->GetMeasurements();
292  unsigned int childIdx=0;
293  for(size_t measurementIndex = 0; measurementIndex < instrumentMeasurements.size(); ++measurementIndex, ++childIdx)
294  {
295  if (inferenceIndex == 0)
296  {
297  // Only add kernel measurement once, in case of multiple inferences
298  JsonChildObject measurementObject{instrumentMeasurements[measurementIndex].m_Name};
299  measurementObject.SetUnit(instrumentMeasurements[measurementIndex].m_Unit);
300  measurementObject.SetType(JsonObjectType::Measurement);
301 
302  BOOST_ASSERT(parentObject.NumChildren() == childIdx);
303  parentObject.AddChild(measurementObject);
304  }
305 
306  parentObject.GetChild(childIdx).AddMeasurement(instrumentMeasurements[measurementIndex].m_Value);
307  }
308 
309 
310  auto childEventsIt = descendantsMap.find(parentEvent);
311  if (childEventsIt != descendantsMap.end())
312  {
313  for (auto childEvent : childEventsIt->second)
314  {
315  if (inferenceIndex == 0)
316  {
317  // Only add second level once, in case of multiple inferences
318  JsonChildObject childObject{childEvent->GetName()};
319  childObject.SetType(JsonObjectType::Event);
320  parentObject.AddChild(childObject);
321  }
322 
323  // Recursively process children. In reality this won't be very deep recursion. ~4-6 levels deep.
324  ExtractJsonObjects(inferenceIndex, childEvent, parentObject.GetChild(childIdx), descendantsMap);
325 
326  childIdx++;
327  }
328  }
329 }
330 
331 void Profiler::Print(std::ostream& outStream) const
332 {
333  // Makes sure timestamps are output with 6 decimals, and save old settings.
334  std::streamsize oldPrecision = outStream.precision();
335  outStream.precision(6);
336  std::ios_base::fmtflags oldFlags = outStream.flags();
337  outStream.setf(std::ios::fixed);
338  JsonPrinter printer(outStream);
339 
340  // First find all the "inference" Events and print out duration measurements.
341  int baseLevel = -1;
342  std::vector<const Event*> inferences;
343  PopulateInferences(inferences, baseLevel);
344 
345  // Second map out descendants hierarchy
346  std::map<const Event*, std::vector<const Event*>> descendantsMap;
347  PopulateDescendants(descendantsMap);
348 
349  JsonChildObject inferenceObject{"inference_measurements"};
350  JsonChildObject layerObject{"layer_measurements"};
351  std::vector<JsonChildObject> workloadObjects;
352  std::map<unsigned int, std::vector<JsonChildObject>> workloadToKernelObjects;
353 
354  for (unsigned int inferenceIndex = 0; inferenceIndex < inferences.size(); ++inferenceIndex)
355  {
356  auto inference = inferences[inferenceIndex];
357  ExtractJsonObjects(inferenceIndex, inference, inferenceObject, descendantsMap);
358  }
359 
360  printer.PrintHeader();
361  printer.PrintArmNNHeader();
362 
363  // print inference object, also prints child layer and kernel measurements
364  size_t id=0;
365  printer.PrintJsonChildObject(inferenceObject, id);
366 
367  // end of ArmNN
368  printer.PrintNewLine();
369  printer.PrintFooter();
370 
371  // end of main JSON object
372  printer.PrintNewLine();
373  printer.PrintFooter();
374  printer.PrintNewLine();
375 
376  // Restores previous precision settings.
377  outStream.flags(oldFlags);
378  outStream.precision(oldPrecision);
379 }
380 
381 void Profiler::AnalyzeEventsAndWriteResults(std::ostream& outStream) const
382 {
383  // Stack should be empty now.
384  const bool saneMarkerSequence = m_Parents.empty();
385 
386  // Abort if the sequence of markers was found to have incorrect information:
387  // The stats cannot be trusted.
388  if (!saneMarkerSequence)
389  {
390  outStream << "Cannot write profiling stats. "
391  "Unexpected errors were found when analyzing the sequence of logged events, which may lead to plainly "
392  "wrong stats. The profiling system may contain implementation issues or could have been used in an "
393  "unsafe manner." << std::endl;
394  return;
395  }
396 
397  // Analyzes the full sequence of events.
398  AnalyzeEventSequenceAndWriteResults(m_EventSequence.cbegin(),
399  m_EventSequence.cend(),
400  outStream);
401 
402  // Aggregates events by tag if requested (spams the output stream if done for all tags).
403  if (g_AggregateProfilingEventsByInference)
404  {
405  outStream << std::endl;
406  outStream << "***" << std::endl;
407  outStream << "*** Per Inference Stats" << std::endl;
408  outStream << "***" << std::endl;
409  outStream << std::endl;
410 
411  int baseLevel = -1;
412  std::vector<const Event*> inferences;
413  PopulateInferences(inferences, baseLevel);
414 
415  // Second map out descendants hierarchy
416  std::map<const Event*, std::vector<const Event*>> descendantsMap;
417  PopulateDescendants(descendantsMap);
418 
419  std::function<void (const Event*, std::vector<const Event*>&)>
420  FindDescendantEvents = [&](const Event* eventPtr,
421  std::vector<const Event*>& sequence)
422  {
423  sequence.push_back(eventPtr);
424 
425  if (CalcLevel(eventPtr) > baseLevel+2) //We only care about levels as deep as workload executions.
426  {
427  return;
428  }
429 
430  auto children = descendantsMap.find(eventPtr);
431  if (children == descendantsMap.end())
432  {
433  return;
434  }
435 
436  for (const Event* child : children->second)
437  {
438  return FindDescendantEvents(child, sequence);
439  }
440  };
441 
442  // Third, find events belonging to each inference
443  int inferenceIdx = 0;
444  for (auto inference : inferences)
445  {
446  std::vector<const Event*> sequence;
447 
448  //build sequence, depth first
449  FindDescendantEvents(inference, sequence);
450 
451  outStream << "> Begin Inference: " << inferenceIdx << std::endl;
452  outStream << std::endl;
453  AnalyzeEventSequenceAndWriteResults(sequence.cbegin(),
454  sequence.cend(),
455  outStream);
456  outStream << std::endl;
457  outStream << "> End Inference: " << inferenceIdx << std::endl;
458 
459  inferenceIdx++;
460  }
461  }
462 }
463 
464 std::uint32_t Profiler::GetEventColor(const BackendId& backendId) const
465 {
466  static BackendId cpuRef("CpuRef");
467  static BackendId cpuAcc("CpuAcc");
468  static BackendId gpuAcc("GpuAcc");
469  if (backendId == cpuRef) {
470  // Cyan
471  return 0xffff001b;
472  } else if (backendId == cpuAcc) {
473  // Green
474  return 0x00ff001b;
475  } else if (backendId == gpuAcc) {
476  // Purple
477  return 0xff007f1b;
478  } else {
479  // Dark gray
480  return 0x5555551b;
481  }
482 }
483 
484 // The thread_local pointer to the profiler instance.
485 thread_local Profiler* tl_Profiler = nullptr;
486 
488 {
489  // Global reference to the single ProfileManager instance allowed.
490  static ProfilerManager s_ProfilerManager;
491  return s_ProfilerManager;
492 }
493 
495 {
496  tl_Profiler = profiler;
497 }
498 
500 {
501  return tl_Profiler;
502 }
503 
504 } // namespace armnn
JsonChildObject & GetChild(const unsigned int index)
Definition: JsonPrinter.hpp:41
void EndEvent(Event *event)
Definition: Profiling.cpp:218
Event class records measurements reported by BeginEvent()/EndEvent() and returns measurements when Ev...
const Event * GetEventPtr(const Event *ptr)
Definition: Profiling.cpp:111
constexpr bool g_WriteReportToStdOutOnProfilerDestruction
Definition: Profiling.cpp:43
void AddChild(const JsonChildObject &childObject)
Definition: JsonPrinter.hpp:36
void RegisterProfiler(Profiler *profiler)
Definition: Profiling.cpp:494
int CalcLevel(const Event *eventPtr)
Definition: Profiling.cpp:235
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:487
Measurement FindMeasurement(const std::string &name, const Event *event)
Definition: Profiling.cpp:45
std::vector< Measurement > FindKernelMeasurements(const Event *event)
Definition: Profiling.cpp:64
void EnableProfiling(bool enableProfiling) override
Enables/disables profiling for this profiler.
Definition: Profiling.cpp:196
const std::string & GetName() const
Get the name of the event.
Copyright (c) 2020 ARM Limited.
void IgnoreUnused(Ts &&...)
void AnalyzeEventsAndWriteResults(std::ostream &outStream) const override
Analyzes the tracked events and writes the results to the given output stream.
Definition: Profiling.cpp:381
DataLayout::NHWC false
size_t NumChildren() const
Definition: JsonPrinter.hpp:51
Profiler * GetProfiler()
Definition: Profiling.cpp:499
BackendId GetBackendId() const
Get the backend id of the event.
bool IsProfilingEnabled() override
Checks whether profiling is enabled.
Definition: Profiling.cpp:191
const std::vector< Measurement > GetMeasurements() const
Get the recorded measurements calculated between Start() and Stop()
void ExtractJsonObjects(unsigned int inferenceIndex, const Event *parentEvent, JsonChildObject &parentObject, std::map< const Event *, std::vector< const Event *>> descendantsMap)
Definition: Profiling.cpp:285
static const std::string WALL_CLOCK_TIME_STOP
void SetType(JsonObjectType type)
Definition: JsonPrinter.hpp:56
Event * BeginEvent(const BackendId &backendId, const std::string &name, std::vector< InstrumentPtr > &&instruments)
Definition: Profiling.cpp:201
static const std::string WALL_CLOCK_TIME_START
thread_local Profiler * tl_Profiler
Definition: Profiling.cpp:485
constexpr std::size_t g_ProfilingEventCountHint
Definition: Profiling.cpp:31
static const std::string WALL_CLOCK_TIME
void AddMeasurement(const double measurement)
Definition: JsonPrinter.hpp:31
void PrintJsonChildObject(const JsonChildObject &object, size_t &id)
Definition: JsonPrinter.cpp:15
constexpr bool g_WriteProfilingEventSequence
Definition: Profiling.cpp:34
constexpr bool g_AggregateProfilingEventsByInference
Definition: Profiling.cpp:39
const std::string & Get() const
Definition: BackendId.hpp:136
void SetUnit(const Measurement::Unit unit)
Definition: JsonPrinter.hpp:46
const Event * GetParentEvent() const
Get the pointer of the parent event.
uint32_t GetEventColor(const BackendId &backendId) const
Definition: Profiling.cpp:464
void Print(std::ostream &outStream) const override
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:331