aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoralered01 <Alex.Redshaw@arm.com>2020-05-07 14:58:29 +0100
committerJan Eilers <jan.eilers@arm.com>2020-05-22 14:42:37 +0100
commit72b415674e7df9152a1e5d206b70e4d7a663f3fc (patch)
treebc5cf924de58f5c0a134bbe9304664d13b7f86a5
parentf668f94de9a07d0bf488c10142a99c910f3b6640 (diff)
downloadarmnn-72b415674e7df9152a1e5d206b70e4d7a663f3fc.tar.gz
Adding more performance metrics
* Implemented CLTuning flow for ExecuteNetwork tests * Added --tuning-path to specify tuning file to use/create * Added --tuning-level to specify tuning level to use as well as enable extra tuning run to generate the tuning file * Fixed issue where TuningLevel was being parsed incorrectly * Added measurements for initialization, network parsing, network optimization, tuning, and shutdown * Added flag to control number of iterations inference is run for Signed-off-by: alered01 <Alex.Redshaw@arm.com> Change-Id: Ic739ff26e136e32aff9f0995217c1c3207008ca4
-rw-r--r--include/armnn/utility/Timer.hpp25
-rw-r--r--src/armnn/Runtime.cpp8
-rw-r--r--src/backends/cl/ClBackendContext.cpp10
-rw-r--r--tests/ExecuteNetwork/ExecuteNetwork.cpp47
-rw-r--r--tests/InferenceModel.hpp31
-rw-r--r--tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp154
6 files changed, 202 insertions, 73 deletions
diff --git a/include/armnn/utility/Timer.hpp b/include/armnn/utility/Timer.hpp
new file mode 100644
index 0000000000..daf689e74f
--- /dev/null
+++ b/include/armnn/utility/Timer.hpp
@@ -0,0 +1,25 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <chrono>
+#include <iomanip>
+
+namespace armnn
+{
+
+inline std::chrono::high_resolution_clock::time_point GetTimeNow()
+{
+ return std::chrono::high_resolution_clock::now();
+}
+
+inline std::chrono::duration<double, std::milli> GetTimeDuration(
+ std::chrono::high_resolution_clock::time_point start_time)
+{
+ return std::chrono::duration<double, std::milli>(GetTimeNow() - start_time);
+}
+
+} \ No newline at end of file
diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp
index dbdd409784..b1b7d51d7c 100644
--- a/src/armnn/Runtime.cpp
+++ b/src/armnn/Runtime.cpp
@@ -7,6 +7,7 @@
#include <armnn/Version.hpp>
#include <armnn/BackendRegistry.hpp>
#include <armnn/Logging.hpp>
+#include <armnn/utility/Timer.hpp>
#include <armnn/backends/IBackendContext.hpp>
#include <backendsCommon/DynamicBackendUtils.hpp>
@@ -171,6 +172,7 @@ Runtime::Runtime(const CreationOptions& options)
: m_NetworkIdCounter(0),
m_ProfilingService(*this)
{
+ const auto start_time = armnn::GetTimeNow();
ARMNN_LOG(info) << "ArmNN v" << ARMNN_VERSION << "\n";
if ( options.m_ProfilingOptions.m_TimelineEnabled && !options.m_ProfilingOptions.m_EnableProfiling )
@@ -225,10 +227,14 @@ Runtime::Runtime(const CreationOptions& options)
m_ProfilingService.ConfigureProfilingService(options.m_ProfilingOptions);
m_DeviceSpec.AddSupportedBackends(supportedBackends);
+
+ ARMNN_LOG(info) << "Initialization time: " << std::setprecision(2)
+ << std::fixed << armnn::GetTimeDuration(start_time).count() << " ms\n";
}
Runtime::~Runtime()
{
+ const auto start_time = armnn::GetTimeNow();
std::vector<int> networkIDs;
try
{
@@ -272,6 +278,8 @@ Runtime::~Runtime()
m_BackendContexts.clear();
BackendRegistryInstance().SetProfilingService(armnn::EmptyOptional());
+ ARMNN_LOG(info) << "Shutdown time: " << std::setprecision(2)
+ << std::fixed << armnn::GetTimeDuration(start_time).count() << " ms\n";
}
LoadedNetwork* Runtime::GetLoadedNetworkPtr(NetworkId networkId) const
diff --git a/src/backends/cl/ClBackendContext.cpp b/src/backends/cl/ClBackendContext.cpp
index bfe93bdc01..42f42b3023 100644
--- a/src/backends/cl/ClBackendContext.cpp
+++ b/src/backends/cl/ClBackendContext.cpp
@@ -79,7 +79,7 @@ TuningLevel ParseTuningLevel(const BackendOptions::Var& value, TuningLevel defau
{
if (value.IsInt())
{
- int v = value.IsInt();
+ int v = value.AsInt();
if (v > static_cast<int>(TuningLevel::Exhaustive) ||
v < static_cast<int>(TuningLevel::None))
{
@@ -218,18 +218,18 @@ ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options)
ConfigureTuner(*(m_Tuner.get()), tuningLevel);
- if (!m_TuningFile.empty())
+ if (!m_TuningFile.empty() && tuningLevel == TuningLevel::None)
{
try
{
m_Tuner->load_from_file(m_TuningFile.c_str());
- } catch (const std::exception& e)
+ }
+ catch (const std::exception& e)
{
ARMNN_LOG(warning) << "Could not load GpuAcc tuner data file.";
}
-
- tuner = m_Tuner.get();
}
+ tuner = m_Tuner.get();
}
m_ClContextControlWrapper = std::make_unique<ClContextControlWrapper>(
diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp
index 57b8692701..66d8e1378f 100644
--- a/tests/ExecuteNetwork/ExecuteNetwork.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp
@@ -35,6 +35,10 @@ int main(int argc, const char* argv[])
uint32_t counterCapturePeriod;
std::string fileFormat;
+ size_t iterations = 1;
+ int tuningLevel = 0;
+ std::string tuningPath;
+
double thresholdTime = 0.0;
size_t subgraphId = 0;
@@ -121,6 +125,14 @@ int main(int argc, const char* argv[])
"If profiling is enabled in 'file-only' mode this is the capture period that will be used in the test")
("file-format", po::value(&fileFormat)->default_value("binary"),
"If profiling is enabled specifies the output file format")
+ ("iterations", po::value<size_t>(&iterations)->default_value(1),
+ "Number of iterations to run the network for, default is set to 1")
+ ("tuning-path", po::value(&tuningPath),
+ "Path to tuning file. Enables use of CL tuning")
+ ("tuning-level", po::value<int>(&tuningLevel)->default_value(0),
+ "Sets the tuning level which enables a tuning run which will update/create a tuning file. "
+ "Available options are: 1 (Rapid), 2 (Normal), 3 (Exhaustive). "
+ "Requires tuning-path to be set, default is set to 0 (No tuning run)")
("parse-unsupported", po::bool_switch()->default_value(false),
"Add unsupported operators as stand-in layers (where supported by parser)");
}
@@ -275,6 +287,33 @@ int main(int argc, const char* argv[])
// Remove duplicates from the list of compute devices.
RemoveDuplicateDevices(computeDevices);
+#if defined(ARMCOMPUTECL_ENABLED)
+ std::shared_ptr<armnn::IGpuAccTunedParameters> tuned_params;
+
+ if (tuningPath != "")
+ {
+ if (tuningLevel != 0)
+ {
+ RunCLTuning(tuningPath, tuningLevel, modelFormat, inputTensorShapes, computeDevices,
+ dynamicBackendsPath, modelPath, inputNames, inputTensorDataFilePaths, inputTypes, quantizeInput,
+ outputTypes, outputNames, outputTensorFiles, dequantizeOutput, enableProfiling,
+ enableFp16TurboMode, enableBf16TurboMode, thresholdTime, printIntermediate, subgraphId,
+ enableLayerDetails, parseUnsupported);
+ }
+ ARMNN_LOG(info) << "Using tuning params: " << tuningPath << "\n";
+ options.m_BackendOptions.emplace_back(
+ armnn::BackendOptions
+ {
+ "GpuAcc",
+ {
+ {"TuningLevel", 0},
+ {"TuningFile", tuningPath.c_str()},
+ {"KernelProfilingEnabled", enableProfiling}
+ }
+ }
+ );
+ }
+#endif
try
{
CheckOptionDependencies(vm);
@@ -288,9 +327,9 @@ int main(int argc, const char* argv[])
// Create runtime
std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(options));
- return RunTest(modelFormat, inputTensorShapes, computeDevices, dynamicBackendsPath, modelPath, inputNames,
- inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames,
- outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode,
- thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnsupported, runtime);
+ return RunTest(modelFormat, inputTensorShapes, computeDevices, dynamicBackendsPath, modelPath,
+ inputNames, inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames,
+ outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode,
+ thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnsupported, iterations, runtime);
}
}
diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp
index 410bc7c04e..781cef4ed0 100644
--- a/tests/InferenceModel.hpp
+++ b/tests/InferenceModel.hpp
@@ -6,6 +6,8 @@
#pragma once
#include <armnn/ArmNN.hpp>
+#include <armnn/Logging.hpp>
+#include <armnn/utility/Timer.hpp>
#include <armnn/BackendRegistry.hpp>
#include <armnn/utility/Assert.hpp>
@@ -31,7 +33,6 @@
#include <boost/variant.hpp>
#include <algorithm>
-#include <chrono>
#include <iterator>
#include <fstream>
#include <map>
@@ -399,8 +400,12 @@ public:
throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
}
+ const auto parsing_start_time = armnn::GetTimeNow();
armnn::INetworkPtr network = CreateNetworkImpl<IParser>::Create(params, m_InputBindings, m_OutputBindings);
+ ARMNN_LOG(info) << "Network parsing time: " << std::setprecision(2)
+ << std::fixed << armnn::GetTimeDuration(parsing_start_time).count() << " ms\n";
+
armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
{
ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
@@ -410,7 +415,12 @@ public:
options.m_ReduceFp32ToBf16 = params.m_EnableBf16TurboMode;
options.m_Debug = params.m_PrintIntermediateLayers;
+ const auto optimization_start_time = armnn::GetTimeNow();
optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
+
+ ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
+ << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
+
if (!optNet)
{
throw armnn::Exception("Optimize returned nullptr");
@@ -494,13 +504,13 @@ public:
}
// Start timer to record inference time in EnqueueWorkload (in milliseconds)
- const auto start_time = GetCurrentTime();
+ const auto start_time = armnn::GetTimeNow();
armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
MakeInputTensors(inputContainers),
MakeOutputTensors(outputContainers));
- const auto end_time = GetCurrentTime();
+ const auto duration = armnn::GetTimeDuration(start_time);
// if profiling is enabled print out the results
if (profiler && profiler->IsProfilingEnabled())
@@ -514,7 +524,7 @@ public:
}
else
{
- return std::chrono::duration<double, std::milli>(end_time - start_time);
+ return duration;
}
}
@@ -584,17 +594,4 @@ private:
{
return armnnUtils::MakeOutputTensors(m_OutputBindings, outputDataContainers);
}
-
- std::chrono::high_resolution_clock::time_point GetCurrentTime()
- {
- return std::chrono::high_resolution_clock::now();
- }
-
- std::chrono::duration<double, std::milli> GetTimeDuration(
- std::chrono::high_resolution_clock::time_point& start_time,
- std::chrono::high_resolution_clock::time_point& end_time)
- {
- return std::chrono::duration<double, std::milli>(end_time - start_time);
- }
-
};
diff --git a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp
index a922228689..ec0eaf90f8 100644
--- a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp
+++ b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp
@@ -4,6 +4,7 @@
//
#include <armnn/ArmNN.hpp>
#include <armnn/TypesUtils.hpp>
+#include <armnn/utility/Timer.hpp>
#if defined(ARMNN_SERIALIZER)
#include "armnnDeserializer/IDeserializer.hpp"
@@ -378,7 +379,8 @@ struct ExecuteNetworkParams
template<typename TParser, typename TDataType>
int MainImpl(const ExecuteNetworkParams& params,
- const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
+ const std::shared_ptr<armnn::IRuntime>& runtime = nullptr,
+ size_t iterations = 1)
{
using TContainer = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
@@ -473,44 +475,47 @@ int MainImpl(const ExecuteNetworkParams& params,
}
}
- // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
- auto inference_duration = model.Run(inputDataContainers, outputDataContainers);
-
- if (params.m_GenerateTensorData)
+ for (size_t x = 0; x < iterations; x++)
{
- ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
- }
+ // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
+ auto inference_duration = model.Run(inputDataContainers, outputDataContainers);
- // Print output tensors
- const auto& infosOut = model.GetOutputBindingInfos();
- for (size_t i = 0; i < numOutputs; i++)
- {
- const armnn::TensorInfo& infoOut = infosOut[i].second;
- auto outputTensorFile = params.m_OutputTensorFiles.empty() ? "" : params.m_OutputTensorFiles[i];
-
- TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
- infoOut,
- outputTensorFile,
- params.m_DequantizeOutput);
- boost::apply_visitor(printer, outputDataContainers[i]);
- }
+ if (params.m_GenerateTensorData)
+ {
+ ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
+ }
- ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
- << std::fixed << inference_duration.count() << " ms";
+ // Print output tensors
+ const auto& infosOut = model.GetOutputBindingInfos();
+ for (size_t i = 0; i < numOutputs; i++)
+ {
+ const armnn::TensorInfo& infoOut = infosOut[i].second;
+ auto outputTensorFile = params.m_OutputTensorFiles.empty() ? "" : params.m_OutputTensorFiles[i];
+
+ TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
+ infoOut,
+ outputTensorFile,
+ params.m_DequantizeOutput);
+ boost::apply_visitor(printer, outputDataContainers[i]);
+ }
- // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
- if (params.m_ThresholdTime != 0.0)
- {
- ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
- << std::fixed << params.m_ThresholdTime << " ms";
- auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count();
- ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
- << std::fixed << thresholdMinusInference << " ms" << "\n";
+ ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
+ << std::fixed << inference_duration.count() << " ms\n";
- if (thresholdMinusInference < 0)
+ // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
+ if (params.m_ThresholdTime != 0.0)
{
- std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
- ARMNN_LOG(fatal) << errorMessage;
+ ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
+ << std::fixed << params.m_ThresholdTime << " ms";
+ auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count();
+ ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
+ << std::fixed << thresholdMinusInference << " ms" << "\n";
+
+ if (thresholdMinusInference < 0)
+ {
+ std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
+ ARMNN_LOG(fatal) << errorMessage;
+ }
}
}
}
@@ -545,6 +550,7 @@ int RunTest(const std::string& format,
const size_t subgraphId,
bool enableLayerDetails = false,
bool parseUnsupported = false,
+ const size_t iterations = 1,
const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
{
std::string modelFormat = armnn::stringUtils::StringTrimCopy(format);
@@ -682,34 +688,34 @@ int RunTest(const std::string& format,
if (modelFormat.find("armnn") != std::string::npos)
{
#if defined(ARMNN_SERIALIZER)
- return MainImpl<armnnDeserializer::IDeserializer, float>(params, runtime);
+ return MainImpl<armnnDeserializer::IDeserializer, float>(params, runtime, iterations);
#else
ARMNN_LOG(fatal) << "Not built with serialization support.";
- return EXIT_FAILURE;
+ return EXIT_FAILURE;
#endif
}
else if (modelFormat.find("caffe") != std::string::npos)
{
#if defined(ARMNN_CAFFE_PARSER)
- return MainImpl<armnnCaffeParser::ICaffeParser, float>(params, runtime);
+ return MainImpl<armnnCaffeParser::ICaffeParser, float>(params, runtime, iterations);
#else
ARMNN_LOG(fatal) << "Not built with Caffe parser support.";
return EXIT_FAILURE;
#endif
}
else if (modelFormat.find("onnx") != std::string::npos)
-{
+ {
#if defined(ARMNN_ONNX_PARSER)
- return MainImpl<armnnOnnxParser::IOnnxParser, float>(params, runtime);
+ return MainImpl<armnnOnnxParser::IOnnxParser, float>(params, runtime, iterations);
#else
ARMNN_LOG(fatal) << "Not built with Onnx parser support.";
- return EXIT_FAILURE;
+ return EXIT_FAILURE;
#endif
}
else if (modelFormat.find("tensorflow") != std::string::npos)
{
#if defined(ARMNN_TF_PARSER)
- return MainImpl<armnnTfParser::ITfParser, float>(params, runtime);
+ return MainImpl<armnnTfParser::ITfParser, float>(params, runtime, iterations);
#else
ARMNN_LOG(fatal) << "Not built with Tensorflow parser support.";
return EXIT_FAILURE;
@@ -720,21 +726,21 @@ int RunTest(const std::string& format,
#if defined(ARMNN_TF_LITE_PARSER)
if (! isModelBinary)
{
- ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat << "'. Only 'binary' format supported \
- for tflite files";
+ ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat
+ << "'. Only 'binary' format supported for tflite files";
return EXIT_FAILURE;
}
- return MainImpl<armnnTfLiteParser::ITfLiteParser, float>(params, runtime);
+ return MainImpl<armnnTfLiteParser::ITfLiteParser, float>(params, runtime, iterations);
#else
- ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat <<
- "'. Please include 'caffe', 'tensorflow', 'tflite' or 'onnx'";
+ ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat
+ << "'. Please include 'caffe', 'tensorflow', 'tflite' or 'onnx'";
return EXIT_FAILURE;
#endif
}
else
{
- ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat <<
- "'. Please include 'caffe', 'tensorflow', 'tflite' or 'onnx'";
+ ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat
+ << "'. Please include 'caffe', 'tensorflow', 'tflite' or 'onnx'";
return EXIT_FAILURE;
}
}
@@ -864,3 +870,57 @@ int RunCsvTest(const armnnUtils::CsvRow &csvRow, const std::shared_ptr<armnn::IR
dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode,
thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnuspported);
}
+
+#if defined(ARMCOMPUTECL_ENABLED)
+int RunCLTuning(const std::string& tuningPath,
+ const int tuningLevel,
+ const std::string& modelFormat,
+ const std::string& inputTensorShapes,
+ const vector<armnn::BackendId>& computeDevices,
+ const std::string& dynamicBackendsPath,
+ const std::string& modelPath,
+ const std::string& inputNames,
+ const std::string& inputTensorDataFilePaths,
+ const std::string& inputTypes,
+ bool quantizeInput,
+ const std::string& outputTypes,
+ const std::string& outputNames,
+ const std::string& outputTensorFiles,
+ bool dequantizeOutput,
+ bool enableProfiling,
+ bool enableFp16TurboMode,
+ bool enableBf16TurboMode,
+ const double& thresholdTime,
+ bool printIntermediate,
+ const size_t subgraphId,
+ bool enableLayerDetails = false,
+ bool parseUnsupported = false)
+{
+ armnn::IRuntime::CreationOptions options;
+ options.m_BackendOptions.emplace_back(
+ armnn::BackendOptions
+ {
+ "GpuAcc",
+ {
+ {"TuningLevel", tuningLevel},
+ {"TuningFile", tuningPath.c_str()},
+ {"KernelProfilingEnabled", enableProfiling}
+ }
+ }
+ );
+
+ std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(options));
+ const auto start_time = armnn::GetTimeNow();
+
+ ARMNN_LOG(info) << "Tuning run...\n";
+ int state = RunTest(modelFormat, inputTensorShapes, computeDevices, dynamicBackendsPath, modelPath, inputNames,
+ inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames,
+ outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode,
+ thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnsupported, 1, runtime);
+
+ ARMNN_LOG(info) << "Tuning time: " << std::setprecision(2)
+ << std::fixed << armnn::GetTimeDuration(start_time).count() << " ms\n";
+
+ return state;
+}
+#endif \ No newline at end of file