diff options
Diffstat (limited to 'tests')
-rw-r--r-- | tests/ExecuteNetwork/ExecuteNetwork.cpp | 47 | ||||
-rw-r--r-- | tests/InferenceModel.hpp | 31 | ||||
-rw-r--r-- | tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp | 154 |
3 files changed, 164 insertions, 68 deletions
diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp index 57b8692701..66d8e1378f 100644 --- a/tests/ExecuteNetwork/ExecuteNetwork.cpp +++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp @@ -35,6 +35,10 @@ int main(int argc, const char* argv[]) uint32_t counterCapturePeriod; std::string fileFormat; + size_t iterations = 1; + int tuningLevel = 0; + std::string tuningPath; + double thresholdTime = 0.0; size_t subgraphId = 0; @@ -121,6 +125,14 @@ int main(int argc, const char* argv[]) "If profiling is enabled in 'file-only' mode this is the capture period that will be used in the test") ("file-format", po::value(&fileFormat)->default_value("binary"), "If profiling is enabled specifies the output file format") + ("iterations", po::value<size_t>(&iterations)->default_value(1), + "Number of iterations to run the network for, default is set to 1") + ("tuning-path", po::value(&tuningPath), + "Path to tuning file. Enables use of CL tuning") + ("tuning-level", po::value<int>(&tuningLevel)->default_value(0), + "Sets the tuning level which enables a tuning run which will update/create a tuning file. " + "Available options are: 1 (Rapid), 2 (Normal), 3 (Exhaustive). " + "Requires tuning-path to be set, default is set to 0 (No tuning run)") ("parse-unsupported", po::bool_switch()->default_value(false), "Add unsupported operators as stand-in layers (where supported by parser)"); } @@ -275,6 +287,33 @@ int main(int argc, const char* argv[]) // Remove duplicates from the list of compute devices. RemoveDuplicateDevices(computeDevices); +#if defined(ARMCOMPUTECL_ENABLED) + std::shared_ptr<armnn::IGpuAccTunedParameters> tuned_params; + + if (tuningPath != "") + { + if (tuningLevel != 0) + { + RunCLTuning(tuningPath, tuningLevel, modelFormat, inputTensorShapes, computeDevices, + dynamicBackendsPath, modelPath, inputNames, inputTensorDataFilePaths, inputTypes, quantizeInput, + outputTypes, outputNames, outputTensorFiles, dequantizeOutput, enableProfiling, + enableFp16TurboMode, enableBf16TurboMode, thresholdTime, printIntermediate, subgraphId, + enableLayerDetails, parseUnsupported); + } + ARMNN_LOG(info) << "Using tuning params: " << tuningPath << "\n"; + options.m_BackendOptions.emplace_back( + armnn::BackendOptions + { + "GpuAcc", + { + {"TuningLevel", 0}, + {"TuningFile", tuningPath.c_str()}, + {"KernelProfilingEnabled", enableProfiling} + } + } + ); + } +#endif try { CheckOptionDependencies(vm); @@ -288,9 +327,9 @@ int main(int argc, const char* argv[]) // Create runtime std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(options)); - return RunTest(modelFormat, inputTensorShapes, computeDevices, dynamicBackendsPath, modelPath, inputNames, - inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames, - outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode, - thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnsupported, runtime); + return RunTest(modelFormat, inputTensorShapes, computeDevices, dynamicBackendsPath, modelPath, + inputNames, inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames, + outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode, + thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnsupported, iterations, runtime); } } diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp index 410bc7c04e..781cef4ed0 100644 --- a/tests/InferenceModel.hpp +++ b/tests/InferenceModel.hpp @@ -6,6 +6,8 @@ #pragma once #include <armnn/ArmNN.hpp> +#include <armnn/Logging.hpp> +#include <armnn/utility/Timer.hpp> #include <armnn/BackendRegistry.hpp> #include <armnn/utility/Assert.hpp> @@ -31,7 +33,6 @@ #include <boost/variant.hpp> #include <algorithm> -#include <chrono> #include <iterator> #include <fstream> #include <map> @@ -399,8 +400,12 @@ public: throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends); } + const auto parsing_start_time = armnn::GetTimeNow(); armnn::INetworkPtr network = CreateNetworkImpl<IParser>::Create(params, m_InputBindings, m_OutputBindings); + ARMNN_LOG(info) << "Network parsing time: " << std::setprecision(2) + << std::fixed << armnn::GetTimeDuration(parsing_start_time).count() << " ms\n"; + armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}}; { ARMNN_SCOPED_HEAP_PROFILING("Optimizing"); @@ -410,7 +415,12 @@ public: options.m_ReduceFp32ToBf16 = params.m_EnableBf16TurboMode; options.m_Debug = params.m_PrintIntermediateLayers; + const auto optimization_start_time = armnn::GetTimeNow(); optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options); + + ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2) + << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n"; + if (!optNet) { throw armnn::Exception("Optimize returned nullptr"); @@ -494,13 +504,13 @@ public: } // Start timer to record inference time in EnqueueWorkload (in milliseconds) - const auto start_time = GetCurrentTime(); + const auto start_time = armnn::GetTimeNow(); armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier, MakeInputTensors(inputContainers), MakeOutputTensors(outputContainers)); - const auto end_time = GetCurrentTime(); + const auto duration = armnn::GetTimeDuration(start_time); // if profiling is enabled print out the results if (profiler && profiler->IsProfilingEnabled()) @@ -514,7 +524,7 @@ public: } else { - return std::chrono::duration<double, std::milli>(end_time - start_time); + return duration; } } @@ -584,17 +594,4 @@ private: { return armnnUtils::MakeOutputTensors(m_OutputBindings, outputDataContainers); } - - std::chrono::high_resolution_clock::time_point GetCurrentTime() - { - return std::chrono::high_resolution_clock::now(); - } - - std::chrono::duration<double, std::milli> GetTimeDuration( - std::chrono::high_resolution_clock::time_point& start_time, - std::chrono::high_resolution_clock::time_point& end_time) - { - return std::chrono::duration<double, std::milli>(end_time - start_time); - } - }; diff --git a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp index a922228689..ec0eaf90f8 100644 --- a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp +++ b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp @@ -4,6 +4,7 @@ // #include <armnn/ArmNN.hpp> #include <armnn/TypesUtils.hpp> +#include <armnn/utility/Timer.hpp> #if defined(ARMNN_SERIALIZER) #include "armnnDeserializer/IDeserializer.hpp" @@ -378,7 +379,8 @@ struct ExecuteNetworkParams template<typename TParser, typename TDataType> int MainImpl(const ExecuteNetworkParams& params, - const std::shared_ptr<armnn::IRuntime>& runtime = nullptr) + const std::shared_ptr<armnn::IRuntime>& runtime = nullptr, + size_t iterations = 1) { using TContainer = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>; @@ -473,44 +475,47 @@ int MainImpl(const ExecuteNetworkParams& params, } } - // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds) - auto inference_duration = model.Run(inputDataContainers, outputDataContainers); - - if (params.m_GenerateTensorData) + for (size_t x = 0; x < iterations; x++) { - ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; - } + // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds) + auto inference_duration = model.Run(inputDataContainers, outputDataContainers); - // Print output tensors - const auto& infosOut = model.GetOutputBindingInfos(); - for (size_t i = 0; i < numOutputs; i++) - { - const armnn::TensorInfo& infoOut = infosOut[i].second; - auto outputTensorFile = params.m_OutputTensorFiles.empty() ? "" : params.m_OutputTensorFiles[i]; - - TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], - infoOut, - outputTensorFile, - params.m_DequantizeOutput); - boost::apply_visitor(printer, outputDataContainers[i]); - } + if (params.m_GenerateTensorData) + { + ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; + } - ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) - << std::fixed << inference_duration.count() << " ms"; + // Print output tensors + const auto& infosOut = model.GetOutputBindingInfos(); + for (size_t i = 0; i < numOutputs; i++) + { + const armnn::TensorInfo& infoOut = infosOut[i].second; + auto outputTensorFile = params.m_OutputTensorFiles.empty() ? "" : params.m_OutputTensorFiles[i]; + + TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], + infoOut, + outputTensorFile, + params.m_DequantizeOutput); + boost::apply_visitor(printer, outputDataContainers[i]); + } - // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line - if (params.m_ThresholdTime != 0.0) - { - ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) - << std::fixed << params.m_ThresholdTime << " ms"; - auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count(); - ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) - << std::fixed << thresholdMinusInference << " ms" << "\n"; + ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) + << std::fixed << inference_duration.count() << " ms\n"; - if (thresholdMinusInference < 0) + // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line + if (params.m_ThresholdTime != 0.0) { - std::string errorMessage = "Elapsed inference time is greater than provided threshold time."; - ARMNN_LOG(fatal) << errorMessage; + ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) + << std::fixed << params.m_ThresholdTime << " ms"; + auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count(); + ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) + << std::fixed << thresholdMinusInference << " ms" << "\n"; + + if (thresholdMinusInference < 0) + { + std::string errorMessage = "Elapsed inference time is greater than provided threshold time."; + ARMNN_LOG(fatal) << errorMessage; + } } } } @@ -545,6 +550,7 @@ int RunTest(const std::string& format, const size_t subgraphId, bool enableLayerDetails = false, bool parseUnsupported = false, + const size_t iterations = 1, const std::shared_ptr<armnn::IRuntime>& runtime = nullptr) { std::string modelFormat = armnn::stringUtils::StringTrimCopy(format); @@ -682,34 +688,34 @@ int RunTest(const std::string& format, if (modelFormat.find("armnn") != std::string::npos) { #if defined(ARMNN_SERIALIZER) - return MainImpl<armnnDeserializer::IDeserializer, float>(params, runtime); + return MainImpl<armnnDeserializer::IDeserializer, float>(params, runtime, iterations); #else ARMNN_LOG(fatal) << "Not built with serialization support."; - return EXIT_FAILURE; + return EXIT_FAILURE; #endif } else if (modelFormat.find("caffe") != std::string::npos) { #if defined(ARMNN_CAFFE_PARSER) - return MainImpl<armnnCaffeParser::ICaffeParser, float>(params, runtime); + return MainImpl<armnnCaffeParser::ICaffeParser, float>(params, runtime, iterations); #else ARMNN_LOG(fatal) << "Not built with Caffe parser support."; return EXIT_FAILURE; #endif } else if (modelFormat.find("onnx") != std::string::npos) -{ + { #if defined(ARMNN_ONNX_PARSER) - return MainImpl<armnnOnnxParser::IOnnxParser, float>(params, runtime); + return MainImpl<armnnOnnxParser::IOnnxParser, float>(params, runtime, iterations); #else ARMNN_LOG(fatal) << "Not built with Onnx parser support."; - return EXIT_FAILURE; + return EXIT_FAILURE; #endif } else if (modelFormat.find("tensorflow") != std::string::npos) { #if defined(ARMNN_TF_PARSER) - return MainImpl<armnnTfParser::ITfParser, float>(params, runtime); + return MainImpl<armnnTfParser::ITfParser, float>(params, runtime, iterations); #else ARMNN_LOG(fatal) << "Not built with Tensorflow parser support."; return EXIT_FAILURE; @@ -720,21 +726,21 @@ int RunTest(const std::string& format, #if defined(ARMNN_TF_LITE_PARSER) if (! isModelBinary) { - ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat << "'. Only 'binary' format supported \ - for tflite files"; + ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat + << "'. Only 'binary' format supported for tflite files"; return EXIT_FAILURE; } - return MainImpl<armnnTfLiteParser::ITfLiteParser, float>(params, runtime); + return MainImpl<armnnTfLiteParser::ITfLiteParser, float>(params, runtime, iterations); #else - ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat << - "'. Please include 'caffe', 'tensorflow', 'tflite' or 'onnx'"; + ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat + << "'. Please include 'caffe', 'tensorflow', 'tflite' or 'onnx'"; return EXIT_FAILURE; #endif } else { - ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat << - "'. Please include 'caffe', 'tensorflow', 'tflite' or 'onnx'"; + ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat + << "'. Please include 'caffe', 'tensorflow', 'tflite' or 'onnx'"; return EXIT_FAILURE; } } @@ -864,3 +870,57 @@ int RunCsvTest(const armnnUtils::CsvRow &csvRow, const std::shared_ptr<armnn::IR dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode, thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnuspported); } + +#if defined(ARMCOMPUTECL_ENABLED) +int RunCLTuning(const std::string& tuningPath, + const int tuningLevel, + const std::string& modelFormat, + const std::string& inputTensorShapes, + const vector<armnn::BackendId>& computeDevices, + const std::string& dynamicBackendsPath, + const std::string& modelPath, + const std::string& inputNames, + const std::string& inputTensorDataFilePaths, + const std::string& inputTypes, + bool quantizeInput, + const std::string& outputTypes, + const std::string& outputNames, + const std::string& outputTensorFiles, + bool dequantizeOutput, + bool enableProfiling, + bool enableFp16TurboMode, + bool enableBf16TurboMode, + const double& thresholdTime, + bool printIntermediate, + const size_t subgraphId, + bool enableLayerDetails = false, + bool parseUnsupported = false) +{ + armnn::IRuntime::CreationOptions options; + options.m_BackendOptions.emplace_back( + armnn::BackendOptions + { + "GpuAcc", + { + {"TuningLevel", tuningLevel}, + {"TuningFile", tuningPath.c_str()}, + {"KernelProfilingEnabled", enableProfiling} + } + } + ); + + std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(options)); + const auto start_time = armnn::GetTimeNow(); + + ARMNN_LOG(info) << "Tuning run...\n"; + int state = RunTest(modelFormat, inputTensorShapes, computeDevices, dynamicBackendsPath, modelPath, inputNames, + inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames, + outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode, + thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnsupported, 1, runtime); + + ARMNN_LOG(info) << "Tuning time: " << std::setprecision(2) + << std::fixed << armnn::GetTimeDuration(start_time).count() << " ms\n"; + + return state; +} +#endif
\ No newline at end of file |