aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDerek Lamberti <derek.lamberti@arm.com>2018-10-01 09:28:57 +0100
committerMatthew Bentham <matthew.bentham@arm.com>2018-10-10 16:16:57 +0100
commite4ba53a85c559d4fe574305276ac815cf7995762 (patch)
treee3f8a3e1b54b2a9cf1e21909f5e4ad9e285be49d
parentfcb382af87e79ae4bdc6a604241efbf2533e1737 (diff)
downloadarmnn-e4ba53a85c559d4fe574305276ac815cf7995762.tar.gz
IVGCVSW-1824 Fix slow profiling of neon. (~50% reduced end-to-end time)
Change-Id: I58295c298934317a2b365887bd9f9f6705cd0a21
-rw-r--r--src/armnn/NeonInterceptorScheduler.cpp31
-rw-r--r--src/armnn/NeonInterceptorScheduler.hpp7
-rw-r--r--src/armnn/NeonTimer.cpp11
3 files changed, 23 insertions, 26 deletions
diff --git a/src/armnn/NeonInterceptorScheduler.cpp b/src/armnn/NeonInterceptorScheduler.cpp
index 8363def68e..a5ca315345 100644
--- a/src/armnn/NeonInterceptorScheduler.cpp
+++ b/src/armnn/NeonInterceptorScheduler.cpp
@@ -9,9 +9,8 @@
namespace armnn{
-NeonInterceptorScheduler::NeonInterceptorScheduler(NeonTimer::KernelMeasurements& kernels,
- arm_compute::IScheduler &realScheduler)
- : m_Kernels(kernels), m_RealScheduler(realScheduler)
+NeonInterceptorScheduler::NeonInterceptorScheduler(arm_compute::IScheduler &realScheduler)
+ : m_RealScheduler(realScheduler)
{
}
@@ -27,32 +26,22 @@ unsigned int NeonInterceptorScheduler::num_threads() const
void NeonInterceptorScheduler::schedule(arm_compute::ICPPKernel* kernel, const Hints& hints)
{
- m_Timer.Start();
+ WallClockTimer::clock::time_point startTime = WallClockTimer::clock::now();
m_RealScheduler.schedule(kernel, hints.split_dimension());
- m_Timer.Stop();
+ WallClockTimer::clock::time_point stopTime = WallClockTimer::clock::now();
- std::vector<Measurement> measurements = m_Timer.GetMeasurements();
- BOOST_ASSERT(!measurements.empty());
-
- Measurement measurement(measurements.front()); // NOTE: 1st measurement is delta
- measurement.m_Name = kernel->name();
- m_Kernels.push_back(std::move(measurement));
+ const auto delta = std::chrono::duration<double, std::micro>(stopTime - startTime);
+ m_Kernels->emplace_back(kernel->name(), delta.count(), Measurement::Unit::TIME_US);
}
void NeonInterceptorScheduler::run_workloads(std::vector <Workload>& workloads)
{
- m_Timer.Start();
- // NOTE: we should think about utilising the tag to make profiling more understandable
+ WallClockTimer::clock::time_point startTime = WallClockTimer::clock::now();
m_RealScheduler.run_tagged_workloads(workloads, nullptr);
- m_Timer.Stop();
-
- std::vector<Measurement> measurements = m_Timer.GetMeasurements();
- BOOST_ASSERT_MSG(measurements.size() == 3, "WallClockTimer does not have correct amount of measurements.");
+ WallClockTimer::clock::time_point stopTime = WallClockTimer::clock::now();
- // WallClockTimer has 3 measurements, duration always being the first.
- Measurement measurement(measurements.front());
- measurement.m_Name = "Workload";
- m_Kernels.push_back(std::move(measurement));
+ const auto delta = std::chrono::duration<double, std::micro>(stopTime - startTime);
+ m_Kernels->emplace_back(std::string("Workload"), delta.count(), Measurement::Unit::TIME_US);
}
} // namespace armnn \ No newline at end of file
diff --git a/src/armnn/NeonInterceptorScheduler.hpp b/src/armnn/NeonInterceptorScheduler.hpp
index 37966b8178..f33b79a2da 100644
--- a/src/armnn/NeonInterceptorScheduler.hpp
+++ b/src/armnn/NeonInterceptorScheduler.hpp
@@ -17,7 +17,7 @@ namespace armnn
class NeonInterceptorScheduler : public arm_compute::IScheduler
{
public:
- NeonInterceptorScheduler(NeonTimer::KernelMeasurements &kernels, arm_compute::IScheduler &realScheduler);
+ NeonInterceptorScheduler(arm_compute::IScheduler &realScheduler);
~NeonInterceptorScheduler() = default;
void set_num_threads(unsigned int numThreads) override;
@@ -28,10 +28,11 @@ public:
void run_workloads(std::vector<Workload> &workloads) override;
+ void SetKernels(NeonTimer::KernelMeasurements* kernels) { m_Kernels = kernels; }
+ NeonTimer::KernelMeasurements* GetKernels() { return m_Kernels; }
private:
- NeonTimer::KernelMeasurements& m_Kernels;
+ NeonTimer::KernelMeasurements* m_Kernels;
arm_compute::IScheduler& m_RealScheduler;
- WallClockTimer m_Timer;
};
} // namespace armnn
diff --git a/src/armnn/NeonTimer.cpp b/src/armnn/NeonTimer.cpp
index 1ee0c64103..219edc9680 100644
--- a/src/armnn/NeonTimer.cpp
+++ b/src/armnn/NeonTimer.cpp
@@ -13,24 +13,31 @@
namespace armnn
{
+namespace
+{
+static thread_local auto g_Interceptor = std::make_shared<NeonInterceptorScheduler>(arm_compute::Scheduler::get());
+}
void NeonTimer::Start()
{
m_Kernels.clear();
+ BOOST_ASSERT(g_Interceptor->GetKernels() == nullptr);
+ g_Interceptor->SetKernels(&m_Kernels);
+
m_RealSchedulerType = arm_compute::Scheduler::get_type();
//Note: We can't currently replace a custom scheduler
if(m_RealSchedulerType != arm_compute::Scheduler::Type::CUSTOM)
{
// Keep the real schedule and add NeonInterceptorScheduler as an interceptor
m_RealScheduler = &arm_compute::Scheduler::get();
- auto interceptor = std::make_shared<NeonInterceptorScheduler>(m_Kernels, *m_RealScheduler);
- arm_compute::Scheduler::set(std::static_pointer_cast<arm_compute::IScheduler>(interceptor));
+ arm_compute::Scheduler::set(std::static_pointer_cast<arm_compute::IScheduler>(g_Interceptor));
}
}
void NeonTimer::Stop()
{
// Restore real scheduler
+ g_Interceptor->SetKernels(nullptr);
arm_compute::Scheduler::set(m_RealSchedulerType);
m_RealScheduler = nullptr;
}