ArmNN
 20.11
Profiling.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include "Profiling.hpp"
6 
7 #include <armnn/BackendId.hpp>
10 
11 #include "JsonPrinter.hpp"
12 
13 #if ARMNN_STREAMLINE_ENABLED
14 #include <streamline_annotate.h>
15 #endif
16 
17 #include <algorithm>
18 #include <iomanip>
19 #include <iostream>
20 #include <fstream>
21 #include <map>
22 #include <stack>
23 
24 namespace armnn
25 {
26 
27 // Controls the amount of memory initially allocated to store profiling events.
28 // If chosen carefully, the profiling system will not make any additional allocations, thus minimizing its impact on
29 // measured times.
30 constexpr std::size_t g_ProfilingEventCountHint = 1024;
31 
32 // Whether profiling reports should include the sequence of events together with their timings.
33 constexpr bool g_WriteProfilingEventSequence = true;
34 
35 // Whether profiling reports should also report detailed information on events grouped by inference.
36 // This can spam the output stream, so use carefully (or adapt the code to just output information
37 // of interest).
39 
40 // Whether a call to Profiler::AnalyzeEventsAndWriteResults() will be made when the Profiler is destroyed.
41 // It can be convenient for local tests.
43 
44 Measurement FindMeasurement(const std::string& name, const Event* event)
45 {
46 
47  ARMNN_ASSERT(event != nullptr);
48 
49  // Search though the measurements.
50  for (const auto& measurement : event->GetMeasurements())
51  {
52  if (measurement.m_Name == name)
53  {
54  // Measurement found.
55  return measurement;
56  }
57  }
58 
59  // Measurement not found.
60  return Measurement{ "", 0.f, Measurement::Unit::TIME_MS };
61 }
62 
63 std::vector<Measurement> FindKernelMeasurements(const Event* event)
64 {
65  ARMNN_ASSERT(event != nullptr);
66 
67  std::vector<Measurement> measurements;
68 
69  // Search through the measurements.
70  for (const auto& measurement : event->GetMeasurements())
71  {
72  if (measurement.m_Name.rfind("OpenClKernelTimer", 0) == 0
73  || measurement.m_Name.rfind("NeonKernelTimer", 0) == 0)
74  {
75  // Measurement found.
76  measurements.push_back(measurement);
77  }
78  }
79 
80  return measurements;
81 }
82 
83 std::map<std::string, Profiler::ProfilingEventStats> Profiler::CalculateProfilingEventStats() const
84 {
85  std::map<std::string, ProfilingEventStats> nameToStatsMap;
86 
87  for (const auto& event : m_EventSequence)
88  {
90 
91  double durationMs = measurement.m_Value;
92  auto it = nameToStatsMap.find(event->GetName());
93  if (it != nameToStatsMap.end())
94  {
95  ProfilingEventStats& stats = it->second;
96  stats.m_TotalMs += durationMs;
97  stats.m_MinMs = std::min(stats.m_MinMs, durationMs);
98  stats.m_MaxMs = std::max(stats.m_MaxMs, durationMs);
99  ++stats.m_Count;
100  }
101  else
102  {
103  nameToStatsMap.emplace(event->GetName(), ProfilingEventStats{ durationMs, durationMs, durationMs, 1 });
104  }
105  }
106 
107  return nameToStatsMap;
108 }
109 
110 const Event* GetEventPtr(const Event* ptr) { return ptr;}
111 const Event* GetEventPtr(const std::unique_ptr<Event>& ptr) {return ptr.get(); }
112 
113 template<typename ItertType>
114 void Profiler::AnalyzeEventSequenceAndWriteResults(ItertType first, ItertType last, std::ostream& outStream) const
115 {
116  // Outputs event sequence, if needed.
117  if (g_WriteProfilingEventSequence)
118  {
119  // Makes sure timestamps are output with 6 decimals, and save old settings.
120  std::streamsize oldPrecision = outStream.precision();
121  outStream.precision(6);
122  std::ios_base::fmtflags oldFlags = outStream.flags();
123  outStream.setf(std::ios::fixed);
124  // Outputs fields.
125  outStream << "Event Sequence - Name | Duration (ms) | Start (ms) | Stop (ms) | Device" << std::endl;
126  for (auto event = first; event != last; ++event)
127  {
128  const Event* eventPtr = GetEventPtr((*event));
129  double startTimeMs = FindMeasurement(WallClockTimer::WALL_CLOCK_TIME_START, eventPtr).m_Value;
130  double stopTimeMs = FindMeasurement(WallClockTimer::WALL_CLOCK_TIME_STOP, eventPtr).m_Value;
131 
132  // Find the WallClock measurement if there is one.
133  double durationMs = FindMeasurement(WallClockTimer::WALL_CLOCK_TIME, eventPtr).m_Value;
134  outStream << std::setw(50) << eventPtr->GetName() << " "
135  << std::setw(20) << durationMs
136  << std::setw(20) << startTimeMs
137  << std::setw(20) << stopTimeMs
138  << std::setw(20) << eventPtr->GetBackendId().Get()
139  << std::endl;
140  }
141  outStream << std::endl;
142  // Restores previous precision settings.
143  outStream.flags(oldFlags);
144  outStream.precision(oldPrecision);
145  }
146 
147  // Aggregates results per event name.
148  std::map<std::string, ProfilingEventStats> nameToStatsMap = CalculateProfilingEventStats();
149 
150  // Outputs aggregated stats.
151  outStream << "Event Stats - Name | Avg (ms) | Min (ms) | Max (ms) | Total (ms) | Count" << std::endl;
152  for (const auto& pair : nameToStatsMap)
153  {
154  const std::string& eventLabel = pair.first;
155  const ProfilingEventStats& eventStats = pair.second;
156  const double avgMs = eventStats.m_TotalMs / double(eventStats.m_Count);
157 
158  outStream << "\t" << std::setw(50) << eventLabel << " " << std::setw(9) << avgMs << " "
159  << std::setw(9) << eventStats.m_MinMs << " " << std::setw(9) << eventStats.m_MaxMs << " "
160  << std::setw(9) << eventStats.m_TotalMs << " " << std::setw(9) << eventStats.m_Count << std::endl;
161  }
162  outStream << std::endl;
163 }
164 
166  : m_ProfilingEnabled(false)
167 {
168  m_EventSequence.reserve(g_ProfilingEventCountHint);
169 
170 #if ARMNN_STREAMLINE_ENABLED
171  // Initialises streamline annotations.
172  ANNOTATE_SETUP;
173 #endif
174 }
175 
177 {
178  if (m_ProfilingEnabled)
179  {
180  if (g_WriteReportToStdOutOnProfilerDestruction)
181  {
182  Print(std::cout);
183  }
184  }
185 
186  // Un-register this profiler from the current thread.
188 }
189 
191 {
192  return m_ProfilingEnabled;
193 }
194 
195 void Profiler::EnableProfiling(bool enableProfiling)
196 {
197  m_ProfilingEnabled = enableProfiling;
198 }
199 
201  const std::string& label,
202  std::vector<InstrumentPtr>&& instruments)
203 {
204  Event* parent = m_Parents.empty() ? nullptr : m_Parents.top();
205  m_EventSequence.push_back(std::make_unique<Event>(label, this, parent, backendId, std::move(instruments)));
206  Event* event = m_EventSequence.back().get();
207  event->Start();
208 
209 #if ARMNN_STREAMLINE_ENABLED
210  ANNOTATE_CHANNEL_COLOR(uint32_t(m_Parents.size()), GetEventColor(backendId), label.c_str());
211 #endif
212 
213  m_Parents.push(event);
214  return event;
215 }
216 
218 {
219  event->Stop();
220 
221  ARMNN_ASSERT(!m_Parents.empty());
222  ARMNN_ASSERT(event == m_Parents.top());
223  m_Parents.pop();
224 
225  Event* parent = m_Parents.empty() ? nullptr : m_Parents.top();
226  IgnoreUnused(parent);
227  ARMNN_ASSERT(event->GetParentEvent() == parent);
228 
229 #if ARMNN_STREAMLINE_ENABLED
230  ANNOTATE_CHANNEL_END(uint32_t(m_Parents.size()));
231 #endif
232 }
233 
234 int CalcLevel(const Event* eventPtr)
235 {
236  int level=0;
237  while (eventPtr != nullptr)
238  {
239  eventPtr = eventPtr->GetParentEvent();
240  level++;
241  }
242  return level;
243 }
244 
245 void Profiler::PopulateInferences(std::vector<const Event*>& outInferences, int& outBaseLevel) const
246 {
247  outInferences.reserve(m_EventSequence.size());
248  for (const auto& event : m_EventSequence)
249  {
250  const Event* eventPtrRaw = event.get();
251  if (eventPtrRaw->GetName() == "EnqueueWorkload")
252  {
253  outBaseLevel = (outBaseLevel == -1) ? CalcLevel(eventPtrRaw) : outBaseLevel;
254  outInferences.push_back(eventPtrRaw);
255  }
256  }
257 }
258 
259 void Profiler::PopulateDescendants(std::map<const Event*, std::vector<const Event*>>& outDescendantsMap) const
260 {
261  for (const auto& event : m_EventSequence)
262  {
263  const Event* eventPtrRaw = event.get();
264  const Event* parent = eventPtrRaw->GetParentEvent();
265 
266  if (!parent)
267  {
268  continue;
269  }
270 
271  auto it = outDescendantsMap.find(parent);
272  if (it == outDescendantsMap.end())
273  {
274  outDescendantsMap.emplace(parent, std::vector<const Event*>({eventPtrRaw}));
275  }
276  else
277  {
278  it->second.push_back(eventPtrRaw);
279  }
280  }
281 }
282 
283 
284 void ExtractJsonObjects(unsigned int inferenceIndex,
285  const Event* parentEvent,
286  JsonChildObject& parentObject,
287  std::map<const Event*, std::vector<const Event*>> descendantsMap)
288 {
289  ARMNN_ASSERT(parentEvent);
290  std::vector<Measurement> instrumentMeasurements = parentEvent->GetMeasurements();
291  unsigned int childIdx=0;
292  for(size_t measurementIndex = 0; measurementIndex < instrumentMeasurements.size(); ++measurementIndex, ++childIdx)
293  {
294  if (inferenceIndex == 0)
295  {
296  // Only add kernel measurement once, in case of multiple inferences
297  JsonChildObject measurementObject{instrumentMeasurements[measurementIndex].m_Name};
298  measurementObject.SetUnit(instrumentMeasurements[measurementIndex].m_Unit);
299  measurementObject.SetType(JsonObjectType::Measurement);
300 
301  ARMNN_ASSERT(parentObject.NumChildren() == childIdx);
302  parentObject.AddChild(measurementObject);
303  }
304 
305  parentObject.GetChild(childIdx).AddMeasurement(instrumentMeasurements[measurementIndex].m_Value);
306  }
307 
308 
309  auto childEventsIt = descendantsMap.find(parentEvent);
310  if (childEventsIt != descendantsMap.end())
311  {
312  for (auto childEvent : childEventsIt->second)
313  {
314  if (inferenceIndex == 0)
315  {
316  // Only add second level once, in case of multiple inferences
317  JsonChildObject childObject{childEvent->GetName()};
318  childObject.SetType(JsonObjectType::Event);
319  parentObject.AddChild(childObject);
320  }
321 
322  // Recursively process children. In reality this won't be very deep recursion. ~4-6 levels deep.
323  ExtractJsonObjects(inferenceIndex, childEvent, parentObject.GetChild(childIdx), descendantsMap);
324 
325  childIdx++;
326  }
327  }
328 }
329 
330 void Profiler::Print(std::ostream& outStream) const
331 {
332  // Makes sure timestamps are output with 6 decimals, and save old settings.
333  std::streamsize oldPrecision = outStream.precision();
334  outStream.precision(6);
335  std::ios_base::fmtflags oldFlags = outStream.flags();
336  outStream.setf(std::ios::fixed);
337  JsonPrinter printer(outStream);
338 
339  // First find all the "inference" Events and print out duration measurements.
340  int baseLevel = -1;
341  std::vector<const Event*> inferences;
342  PopulateInferences(inferences, baseLevel);
343 
344  // Second map out descendants hierarchy
345  std::map<const Event*, std::vector<const Event*>> descendantsMap;
346  PopulateDescendants(descendantsMap);
347 
348  JsonChildObject inferenceObject{"inference_measurements"};
349  JsonChildObject layerObject{"layer_measurements"};
350  std::vector<JsonChildObject> workloadObjects;
351  std::map<unsigned int, std::vector<JsonChildObject>> workloadToKernelObjects;
352 
353  for (unsigned int inferenceIndex = 0; inferenceIndex < inferences.size(); ++inferenceIndex)
354  {
355  auto inference = inferences[inferenceIndex];
356  ExtractJsonObjects(inferenceIndex, inference, inferenceObject, descendantsMap);
357  }
358 
359  printer.PrintHeader();
360  printer.PrintArmNNHeader();
361 
362  // print inference object, also prints child layer and kernel measurements
363  size_t id=0;
364  printer.PrintJsonChildObject(inferenceObject, id);
365 
366  // end of ArmNN
367  printer.PrintNewLine();
368  printer.PrintFooter();
369 
370  // end of main JSON object
371  printer.PrintNewLine();
372  printer.PrintFooter();
373  printer.PrintNewLine();
374 
375  // Restores previous precision settings.
376  outStream.flags(oldFlags);
377  outStream.precision(oldPrecision);
378 }
379 
380 void Profiler::AnalyzeEventsAndWriteResults(std::ostream& outStream) const
381 {
382  // Stack should be empty now.
383  const bool saneMarkerSequence = m_Parents.empty();
384 
385  // Abort if the sequence of markers was found to have incorrect information:
386  // The stats cannot be trusted.
387  if (!saneMarkerSequence)
388  {
389  outStream << "Cannot write profiling stats. "
390  "Unexpected errors were found when analyzing the sequence of logged events, which may lead to plainly "
391  "wrong stats. The profiling system may contain implementation issues or could have been used in an "
392  "unsafe manner." << std::endl;
393  return;
394  }
395 
396  // Analyzes the full sequence of events.
397  AnalyzeEventSequenceAndWriteResults(m_EventSequence.cbegin(),
398  m_EventSequence.cend(),
399  outStream);
400 
401  // Aggregates events by tag if requested (spams the output stream if done for all tags).
402  if (g_AggregateProfilingEventsByInference)
403  {
404  outStream << std::endl;
405  outStream << "***" << std::endl;
406  outStream << "*** Per Inference Stats" << std::endl;
407  outStream << "***" << std::endl;
408  outStream << std::endl;
409 
410  int baseLevel = -1;
411  std::vector<const Event*> inferences;
412  PopulateInferences(inferences, baseLevel);
413 
414  // Second map out descendants hierarchy
415  std::map<const Event*, std::vector<const Event*>> descendantsMap;
416  PopulateDescendants(descendantsMap);
417 
418  std::function<void (const Event*, std::vector<const Event*>&)>
419  FindDescendantEvents = [&](const Event* eventPtr,
420  std::vector<const Event*>& sequence)
421  {
422  sequence.push_back(eventPtr);
423 
424  if (CalcLevel(eventPtr) > baseLevel+2) //We only care about levels as deep as workload executions.
425  {
426  return;
427  }
428 
429  auto children = descendantsMap.find(eventPtr);
430  if (children == descendantsMap.end())
431  {
432  return;
433  }
434 
435  for (const Event* child : children->second)
436  {
437  return FindDescendantEvents(child, sequence);
438  }
439  };
440 
441  // Third, find events belonging to each inference
442  int inferenceIdx = 0;
443  for (auto inference : inferences)
444  {
445  std::vector<const Event*> sequence;
446 
447  //build sequence, depth first
448  FindDescendantEvents(inference, sequence);
449 
450  outStream << "> Begin Inference: " << inferenceIdx << std::endl;
451  outStream << std::endl;
452  AnalyzeEventSequenceAndWriteResults(sequence.cbegin(),
453  sequence.cend(),
454  outStream);
455  outStream << std::endl;
456  outStream << "> End Inference: " << inferenceIdx << std::endl;
457 
458  inferenceIdx++;
459  }
460  }
461 }
462 
463 std::uint32_t Profiler::GetEventColor(const BackendId& backendId) const
464 {
465  static BackendId cpuRef("CpuRef");
466  static BackendId cpuAcc("CpuAcc");
467  static BackendId gpuAcc("GpuAcc");
468  if (backendId == cpuRef) {
469  // Cyan
470  return 0xffff001b;
471  } else if (backendId == cpuAcc) {
472  // Green
473  return 0x00ff001b;
474  } else if (backendId == gpuAcc) {
475  // Purple
476  return 0xff007f1b;
477  } else {
478  // Dark gray
479  return 0x5555551b;
480  }
481 }
482 
483 // The thread_local pointer to the profiler instance.
484 thread_local Profiler* tl_Profiler = nullptr;
485 
487 {
488  // Global reference to the single ProfileManager instance allowed.
489  static ProfilerManager s_ProfilerManager;
490  return s_ProfilerManager;
491 }
492 
494 {
495  tl_Profiler = profiler;
496 }
497 
499 {
500  return tl_Profiler;
501 }
502 
503 } // namespace armnn
JsonChildObject & GetChild(const unsigned int index)
Definition: JsonPrinter.hpp:41
void EndEvent(Event *event)
Definition: Profiling.cpp:217
Event class records measurements reported by BeginEvent()/EndEvent() and returns measurements when Ev...
const Event * GetEventPtr(const Event *ptr)
Definition: Profiling.cpp:110
constexpr bool g_WriteReportToStdOutOnProfilerDestruction
Definition: Profiling.cpp:42
void AddChild(const JsonChildObject &childObject)
Definition: JsonPrinter.hpp:36
void RegisterProfiler(Profiler *profiler)
Definition: Profiling.cpp:493
int CalcLevel(const Event *eventPtr)
Definition: Profiling.cpp:234
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:486
Measurement FindMeasurement(const std::string &name, const Event *event)
Definition: Profiling.cpp:44
DataLayout::NCHW false
std::vector< Measurement > FindKernelMeasurements(const Event *event)
Definition: Profiling.cpp:63
void EnableProfiling(bool enableProfiling) override
Enables/disables profiling for this profiler.
Definition: Profiling.cpp:195
const std::string & GetName() const
Get the name of the event.
Copyright (c) 2020 ARM Limited.
void IgnoreUnused(Ts &&...)
void AnalyzeEventsAndWriteResults(std::ostream &outStream) const override
Analyzes the tracked events and writes the results to the given output stream.
Definition: Profiling.cpp:380
size_t NumChildren() const
Definition: JsonPrinter.hpp:51
Profiler * GetProfiler()
Definition: Profiling.cpp:498
BackendId GetBackendId() const
Get the backend id of the event.
bool IsProfilingEnabled() override
Checks whether profiling is enabled.
Definition: Profiling.cpp:190
const std::vector< Measurement > GetMeasurements() const
Get the recorded measurements calculated between Start() and Stop()
void ExtractJsonObjects(unsigned int inferenceIndex, const Event *parentEvent, JsonChildObject &parentObject, std::map< const Event *, std::vector< const Event *>> descendantsMap)
Definition: Profiling.cpp:284
static const std::string WALL_CLOCK_TIME_STOP
void SetType(JsonObjectType type)
Definition: JsonPrinter.hpp:56
Event * BeginEvent(const BackendId &backendId, const std::string &name, std::vector< InstrumentPtr > &&instruments)
Definition: Profiling.cpp:200
static const std::string WALL_CLOCK_TIME_START
thread_local Profiler * tl_Profiler
Definition: Profiling.cpp:484
constexpr std::size_t g_ProfilingEventCountHint
Definition: Profiling.cpp:30
static const std::string WALL_CLOCK_TIME
void AddMeasurement(const double measurement)
Definition: JsonPrinter.hpp:31
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
void PrintJsonChildObject(const JsonChildObject &object, size_t &id)
Definition: JsonPrinter.cpp:15
constexpr bool g_WriteProfilingEventSequence
Definition: Profiling.cpp:33
constexpr bool g_AggregateProfilingEventsByInference
Definition: Profiling.cpp:38
const std::string & Get() const
Definition: BackendId.hpp:136
void SetUnit(const Measurement::Unit unit)
Definition: JsonPrinter.hpp:46
const Event * GetParentEvent() const
Get the pointer of the parent event.
uint32_t GetEventColor(const BackendId &backendId) const
Definition: Profiling.cpp:463
void Print(std::ostream &outStream) const override
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:330