From f17fcd5fd016480acd85ee05c5636338a16bed08 Mon Sep 17 00:00:00 2001 From: Jan Eilers Date: Mon, 26 Jul 2021 22:20:00 +0100 Subject: Different input data for every iteration of ExecuteNetwork * Allows to supply different input data for every execution of a model when using the 'iterations' option in ExecuteNetwork * Removes the option 'simultaneous-iterations' because it's functionallity is now covered by 'iterations' * Adds a deprecation warning message to notify users * Little refactor of warning messages Signed-off-by: Jan Eilers Change-Id: Ib3ab0d6533f6952bfee20d098a890b653c34cc12 --- tests/ExecuteNetwork/ExecuteNetwork.cpp | 113 +++++++++++-- tests/ExecuteNetwork/ExecuteNetworkParams.cpp | 179 +++++++++++---------- .../ExecuteNetworkProgramOptions.cpp | 51 ++++-- 3 files changed, 232 insertions(+), 111 deletions(-) diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp index a9b5a3c3f4..e757d2c992 100644 --- a/tests/ExecuteNetwork/ExecuteNetwork.cpp +++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp @@ -356,15 +356,31 @@ int MainImpl(const ExecuteNetworkParams& params, model.GetInputQuantizationParams()) : armnn::EmptyOptional(); - for(unsigned int j = 0; j < params.m_SimultaneousIterations ; ++j) + if (params.m_InputTensorDataFilePaths.size() > numInputs) + { + ARMNN_LOG(info) << "Given network has " << numInputs << " input/s. One input-tensor-data file is required " + << "for each input. The user provided " + << params.m_InputTensorDataFilePaths.size() + << " input-tensor-data file/s which will be used to fill the input/s.\n"; + } + + for(unsigned int j = 0; j < params.m_Iterations ; ++j) { std::vector inputDataContainers; for(unsigned int i = 0; i < numInputs; ++i) { + // If there are less input files given than required for the execution of + // params.m_Iterations we simply start with the first input file again + size_t inputFileIndex = j * numInputs + i; + if (!params.m_InputTensorDataFilePaths.empty()) + { + inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size(); + } + armnn::Optional dataFile = params.m_GenerateTensorData ? armnn::EmptyOptional() : armnn::MakeOptional( - params.m_InputTensorDataFilePaths[(j * numInputs) + i]); + params.m_InputTensorDataFilePaths.at(inputFileIndex)); unsigned int numElements = model.GetInputSize(i); if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i]) @@ -388,7 +404,7 @@ int MainImpl(const ExecuteNetworkParams& params, const size_t numOutputs = inferenceModelParams.m_OutputBindings.size(); - for (unsigned int j = 0; j < params.m_SimultaneousIterations; ++j) + for (unsigned int j = 0; j < params.m_Iterations; ++j) { std::vector outputDataContainers; for (unsigned int i = 0; i < numOutputs; ++i) @@ -418,13 +434,30 @@ int MainImpl(const ExecuteNetworkParams& params, outputs.push_back(outputDataContainers); } + if (params.m_Iterations > 1) + { + std::stringstream msg; + msg << "Network will be executed " << params.m_Iterations; + if (params.m_Concurrent) + { + msg << " times in an asynchronous manner. "; + } + else + { + msg << " times successively. "; + } + msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to " + "cover each execution."; + ARMNN_LOG(info) << msg.str(); + } + // Synchronous execution if (!params.m_Concurrent) { for (size_t x = 0; x < params.m_Iterations; x++) { // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds) - auto inference_duration = model.Run(inputs[0], outputs[0]); + auto inference_duration = model.Run(inputs[x], outputs[x]); if (params.m_GenerateTensorData) { @@ -436,13 +469,29 @@ int MainImpl(const ExecuteNetworkParams& params, for (size_t i = 0; i < numOutputs; i++) { const armnn::TensorInfo& infoOut = infosOut[i].second; - auto outputTensorFile = params.m_OutputTensorFiles.empty() ? "" : params.m_OutputTensorFiles[i]; + + // We've made sure before that the number of output files either equals numOutputs, in which case + // we override those files when processing the results of each iteration (only the result of the + // last iteration will be stored), or there are enough + // output files for each output of each iteration. + size_t outputFileIndex = x * numOutputs + i; + if (!params.m_OutputTensorFiles.empty()) + { + outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size(); + ARMNN_LOG(info) << "Writing output " << i << " named: '" + << inferenceModelParams.m_OutputBindings[i] + << "' of iteration: " << x+1 << " to file: '" + << params.m_OutputTensorFiles[outputFileIndex] << "'"; + } + auto outputTensorFile = params.m_OutputTensorFiles.empty() + ? "" + : params.m_OutputTensorFiles[outputFileIndex]; TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], infoOut, outputTensorFile, params.m_DequantizeOutput); - mapbox::util::apply_visitor(printer, outputs[0][i]); + mapbox::util::apply_visitor(printer, outputs[x][i]); } ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) @@ -481,7 +530,7 @@ int MainImpl(const ExecuteNetworkParams& params, // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the // LoadedNetwork with each scheduled inference having a specific priority - for (size_t i = 0; i < params.m_SimultaneousIterations; ++i) + for (size_t i = 0; i < params.m_Iterations; ++i) { std::shared_ptr cb = callbackManager.GetNewCallback(); inferenceOutputMap.insert({cb->GetInferenceId(), outputs[i]}); @@ -490,7 +539,7 @@ int MainImpl(const ExecuteNetworkParams& params, // Check the results unsigned int j = 0; - for (size_t iteration = 0; iteration < params.m_SimultaneousIterations; ++iteration) + for (size_t iteration = 0; iteration < params.m_Iterations; ++iteration) { auto cb = callbackManager.GetNotifiedCallback(); @@ -522,10 +571,24 @@ int MainImpl(const ExecuteNetworkParams& params, const auto& infosOut = model.GetOutputBindingInfos(); for (size_t i = 0; i < numOutputs; i++) { + // We've made sure before that the number of output files either equals numOutputs, in which + // case we override those files when processing the results of each iteration (only the result + // of the last iteration will be stored), or there are enough + // output files for each output of each iteration. + size_t outputFileIndex = iteration * numOutputs + i; + if (!params.m_OutputTensorFiles.empty()) + { + outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size(); + ARMNN_LOG(info) << "Writing output " << i << " named: '" + << inferenceModelParams.m_OutputBindings[i] + << "' of iteration: " << iteration+1 << " to file: '" + << params.m_OutputTensorFiles[outputFileIndex] << "'"; + } + const armnn::TensorInfo& infoOut = infosOut[i].second; auto outputTensorFile = params.m_OutputTensorFiles.empty() ? "" - : params.m_OutputTensorFiles[(j * numOutputs) + i]; + : params.m_OutputTensorFiles[outputFileIndex]; TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], infoOut, @@ -575,12 +638,12 @@ int MainImpl(const ExecuteNetworkParams& params, ARMNN_LOG(info) << "Asynchronous Execution with std::launch:async... \n"; std::vector>>> inferenceResults; - inferenceResults.reserve(params.m_SimultaneousIterations); + inferenceResults.reserve(params.m_Iterations); // Create WorkingMemHandles for each inference std::vector> workingMemHandles; - workingMemHandles.reserve(params.m_SimultaneousIterations); - for (unsigned int i = 0; i < params.m_SimultaneousIterations; ++i) + workingMemHandles.reserve(params.m_Iterations); + for (unsigned int i = 0; i < params.m_Iterations; ++i) { workingMemHandles.push_back(model.CreateWorkingMemHandle()); } @@ -588,7 +651,7 @@ int MainImpl(const ExecuteNetworkParams& params, // Run each inference in its own thread // start a timer const auto start_time = armnn::GetTimeNow(); - for (unsigned int i = 0; i < params.m_SimultaneousIterations; ++i) + for (unsigned int i = 0; i < params.m_Iterations; ++i) { armnn::experimental::IWorkingMemHandle& workingMemHandleRef = *workingMemHandles[i].get(); @@ -616,10 +679,23 @@ int MainImpl(const ExecuteNetworkParams& params, const auto& infosOut = model.GetOutputBindingInfos(); for (size_t i = 0; i < numOutputs; i++) { + // We've made sure before that the number of output files either equals numOutputs, in which + // case we override those files when processing the results of each iteration (only the result + // of the last iteration will be stored), or there are enough + // output files for each output of each iteration. + size_t outputFileIndex = j * numOutputs + i; + if (!params.m_OutputTensorFiles.empty()) + { + outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size(); + ARMNN_LOG(info) << "Writing output " << i << " named: '" + << inferenceModelParams.m_OutputBindings[i] + << "' of iteration: " << j+1 << " to file: '" + << params.m_OutputTensorFiles[outputFileIndex] << "'"; + } const armnn::TensorInfo& infoOut = infosOut[i].second; auto outputTensorFile = params.m_OutputTensorFiles.empty() ? "" - : params.m_OutputTensorFiles[(j * numOutputs) + i]; + : params.m_OutputTensorFiles[outputFileIndex]; TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], infoOut, @@ -683,7 +759,14 @@ int main(int argc, const char* argv[]) // Get ExecuteNetwork parameters and runtime options from command line - ProgramOptions ProgramOptions(argc, argv); + // This might throw an InvalidArgumentException if the user provided invalid inputs + ProgramOptions ProgramOptions; + try { + ProgramOptions.ParseOptions(argc, argv); + } catch (const std::exception &e){ + ARMNN_LOG(fatal) << e.what(); + return EXIT_FAILURE; + } // Create runtime std::shared_ptr runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions)); diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp index 4002e89eba..dcdd423246 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp +++ b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp @@ -110,115 +110,122 @@ void CheckClTuningParameter(const int& tuningLevel, void ExecuteNetworkParams::ValidateParams() { - // Set to true if it is preferred to throw an exception rather than use ARMNN_LOG - bool throwExc = false; - - try + if (m_DynamicBackendsPath == "") { - if (m_DynamicBackendsPath == "") + // Check compute devices are valid unless they are dynamically loaded at runtime + std::string invalidBackends; + if (!CheckRequestedBackendsAreValid(m_ComputeDevices, armnn::Optional(invalidBackends))) { - // Check compute devices are valid unless they are dynamically loaded at runtime - std::string invalidBackends; - if (!CheckRequestedBackendsAreValid(m_ComputeDevices, armnn::Optional(invalidBackends))) - { - ARMNN_LOG(fatal) << "The list of preferred devices contains invalid backend IDs: " - << invalidBackends; - } + ARMNN_LOG(fatal) << "The list of preferred devices contains invalid backend IDs: " + << invalidBackends; } + } - CheckClTuningParameter(m_TuningLevel, m_TuningPath, m_ComputeDevices); + CheckClTuningParameter(m_TuningLevel, m_TuningPath, m_ComputeDevices); - if (m_EnableBf16TurboMode && m_EnableFp16TurboMode) - { - ARMNN_LOG(fatal) << "BFloat16 and Float16 turbo mode cannot be enabled at the same time."; - } + if (m_EnableBf16TurboMode && m_EnableFp16TurboMode) + { + throw armnn::InvalidArgumentException("BFloat16 and Float16 turbo mode cannot be " + "enabled at the same time."); + } - m_IsModelBinary = IsModelBinary(m_ModelFormat); + m_IsModelBinary = IsModelBinary(m_ModelFormat); - CheckModelFormat(m_ModelFormat); + CheckModelFormat(m_ModelFormat); - // Check input tensor shapes - if ((m_InputTensorShapes.size() != 0) && - (m_InputTensorShapes.size() != m_InputNames.size())) - { - ARMNN_LOG(fatal) << "input-name and input-tensor-shape must have the same amount of elements. "; - } + // Check input tensor shapes + if ((m_InputTensorShapes.size() != 0) && + (m_InputTensorShapes.size() != m_InputNames.size())) + { + throw armnn::InvalidArgumentException("input-name and input-tensor-shape must have " + "the same amount of elements. "); + } - if (m_InputTensorDataFilePaths.size() != 0) + if (m_InputTensorDataFilePaths.size() != 0) + { + if (!ValidatePaths(m_InputTensorDataFilePaths, true)) { - if (!ValidatePaths(m_InputTensorDataFilePaths, true)) - { - ARMNN_LOG(fatal) << "One or more input data file paths are not valid. "; - } - - if (!m_Concurrent && m_InputTensorDataFilePaths.size() != m_InputNames.size()) - { - ARMNN_LOG(fatal) << "input-name and input-tensor-data must have the same amount of elements. "; - } - - if (m_InputTensorDataFilePaths.size() < m_SimultaneousIterations * m_InputNames.size()) - { - ARMNN_LOG(fatal) << "There is not enough input data for " << m_SimultaneousIterations << " execution."; - } - if (m_InputTensorDataFilePaths.size() > m_SimultaneousIterations * m_InputNames.size()) - { - ARMNN_LOG(fatal) << "There is more input data for " << m_SimultaneousIterations << " execution."; - } + throw armnn::InvalidArgumentException("One or more input data file paths are not valid."); } - if ((m_OutputTensorFiles.size() != 0) && - (m_OutputTensorFiles.size() != m_OutputNames.size())) + if (m_InputTensorDataFilePaths.size() < m_InputNames.size()) { - ARMNN_LOG(fatal) << "output-name and write-outputs-to-file must have the same amount of elements. "; + throw armnn::InvalidArgumentException( + fmt::format("According to the number of input names the user provided the network has {} " + "inputs. But only {} input-tensor-data file paths were provided. Each input of the " + "model is expected to be stored in it's own file.", + m_InputNames.size(), + m_InputTensorDataFilePaths.size())); } - - if ((m_OutputTensorFiles.size() != 0) - && m_OutputTensorFiles.size() != m_SimultaneousIterations * m_OutputNames.size()) + else if (m_InputTensorDataFilePaths.size() % m_InputNames.size() != 0) { - ARMNN_LOG(fatal) << "There is not enough output data for " << m_SimultaneousIterations << " execution."; + throw armnn::InvalidArgumentException( + fmt::format("According to the number of input names the user provided the network has {} " + "inputs. The user specified {} input-tensor-data file paths which is not " + "divisible by the number of inputs.", + m_InputNames.size(), + m_InputTensorDataFilePaths.size())); } + } - if (m_InputTypes.size() == 0) - { - //Defaults the value of all inputs to "float" - m_InputTypes.assign(m_InputNames.size(), "float"); - } - else if ((m_InputTypes.size() != 0) && - (m_InputTypes.size() != m_InputNames.size())) - { - ARMNN_LOG(fatal) << "input-name and input-type must have the same amount of elements."; - } + if (m_InputTypes.size() == 0) + { + //Defaults the value of all inputs to "float" + m_InputTypes.assign(m_InputNames.size(), "float"); + } + else if ((m_InputTypes.size() != 0) && + (m_InputTypes.size() != m_InputNames.size())) + { + throw armnn::InvalidArgumentException("input-name and input-type must have the same amount of elements."); + } - if (m_OutputTypes.size() == 0) - { - //Defaults the value of all outputs to "float" - m_OutputTypes.assign(m_OutputNames.size(), "float"); - } - else if ((m_OutputTypes.size() != 0) && - (m_OutputTypes.size() != m_OutputNames.size())) - { - ARMNN_LOG(fatal) << "output-name and output-type must have the same amount of elements."; - } + // Make sure that the number of input files given is divisible by the number of inputs of the model + if (!(m_InputTensorDataFilePaths.size() % m_InputNames.size() == 0)) + { + throw armnn::InvalidArgumentException( + fmt::format("The number of input-tensor-data files ({0}) is not divisible by the " + "number of inputs ({1} according to the number of input names).", + m_InputTensorDataFilePaths.size(), + m_InputNames.size())); + } - // Check that threshold time is not less than zero - if (m_ThresholdTime < 0) - { - ARMNN_LOG(fatal) << "Threshold time supplied as a command line argument is less than zero."; - } + if (m_OutputTypes.size() == 0) + { + //Defaults the value of all outputs to "float" + m_OutputTypes.assign(m_OutputNames.size(), "float"); } - catch (std::string& exc) + else if ((m_OutputTypes.size() != 0) && + (m_OutputTypes.size() != m_OutputNames.size())) { - if (throwExc) - { - throw armnn::InvalidArgumentException(exc); - } - else - { - std::cout << exc; - exit(EXIT_FAILURE); + throw armnn::InvalidArgumentException("output-name and output-type must have the same amount of elements."); + } + + // Make sure that the number of output files given is equal to the number of outputs of the model + // or equal to the number of outputs of the model multiplied with the number of iterations + if (!m_OutputTensorFiles.empty()) + { + if ((m_OutputTensorFiles.size() != m_OutputNames.size()) && + (m_OutputTensorFiles.size() != m_OutputNames.size() * m_Iterations)) + { + std::stringstream errmsg; + auto numOutputs = m_OutputNames.size(); + throw armnn::InvalidArgumentException( + fmt::format("The user provided {0} output-tensor files. The only allowed number of output-tensor " + "files is the number of outputs of the network ({1} according to the number of " + "output names) or the number of outputs multiplied with the number of times the " + "network should be executed (NumOutputs * NumIterations = {1} * {2} = {3}).", + m_OutputTensorFiles.size(), + numOutputs, + m_Iterations, + numOutputs*m_Iterations)); } } - // Check turbo modes + + // Check that threshold time is not less than zero + if (m_ThresholdTime < 0) + { + throw armnn::InvalidArgumentException("Threshold time supplied as a command line argument is less than zero."); + } // Warn if ExecuteNetwork will generate dummy input data if (m_GenerateTensorData) diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp index b12547f51c..6ac64ffff2 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp +++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp @@ -139,6 +139,20 @@ void CheckRequiredOptions(const cxxopts::ParseResult& result) } } +void CheckForDeprecatedOptions(const cxxopts::ParseResult& result) +{ + if(result.count("simultaneous-iterations") > 0) + { + ARMNN_LOG(warning) << "DEPRECATED: The program option 'simultaneous-iterations' is deprecated and will be " + "removed soon. Please use the option 'iterations' combined with 'concurrent' instead."; + } + if(result.count("armnn-tflite-delegate") > 0) + { + ARMNN_LOG(warning) << "DEPRECATED: The program option 'armnn-tflite-delegate' is deprecated and will be " + "removed soon. Please use the option 'tflite-executor' instead."; + } +} + void ProgramOptions::ValidateExecuteNetworkParams() { m_ExNetParams.ValidateParams(); @@ -196,13 +210,15 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", ("n,concurrent", "This option is for Arm NN internal asynchronous testing purposes. " - "By default it is set to true if thread-pool-size or simultaneous-iterations are greater than 1", + "False by default. If set to true will use std::launch::async or the Arm NN thread pool, " + "if 'thread-pool-size' is greater than 0, for asynchronous execution.", cxxopts::value(m_ExNetParams.m_Concurrent)->default_value("false")->implicit_value("true")) ("d,input-tensor-data", "Path to files containing the input data as a flat array separated by whitespace. " - "Several paths can be passed by separating them with a comma. If not specified, the network will be " - "run with dummy data (useful for profiling).", + "Several paths can be passed by separating them with a comma if the network has multiple inputs " + "or you wish to run the model multiple times with different input data using the 'iterations' option. " + "If not specified, the network will be run with dummy data (useful for profiling).", cxxopts::value()->default_value("")) ("h,help", "Display usage information") @@ -213,7 +229,14 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", cxxopts::value(m_ExNetParams.m_InferOutputShape)->default_value("false")->implicit_value("true")) ("iterations", - "Number of iterations to run the network for, default is set to 1", + "Number of iterations to run the network for, default is set to 1. " + "If you wish to run the model with different input data for every execution you can do so by " + "supplying more input file paths to the 'input-tensor-data' option. " + "Note: The number of input files provided must be divisible by the number of inputs of the model. " + "e.g. Your model has 2 inputs and you supply 4 input files. If you set 'iterations' to 6 the first " + "run will consume the first two inputs, the second the next two and the last will begin from the " + "start and use the first two inputs again. " + "Note: If the 'concurrent' option is enabled all iterations will be run asynchronously.", cxxopts::value(m_ExNetParams.m_Iterations)->default_value("1")) ("l,dequantize-output", @@ -282,17 +305,20 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", ("D,armnn-tflite-delegate", "Enable Arm NN TfLite delegate. " - "This option is depreciated please use tflite-executor instead", + "DEPRECATED: This option is deprecated please use tflite-executor instead", cxxopts::value(m_ExNetParams.m_EnableDelegate)->default_value("false")->implicit_value("true")) ("simultaneous-iterations", "Number of simultaneous iterations to async-run the network for, default is set to 1 (disabled). " - "When thread-pool-size is set the Arm NN thread pool is used. Otherwise std::launch::async is used.", + "When thread-pool-size is set the Arm NN thread pool is used. Otherwise std::launch::async is used." + "DEPRECATED: This option is deprecated and will be removed soon. " + "Please use the option 'iterations' combined with 'concurrent' instead.", cxxopts::value(m_ExNetParams.m_SimultaneousIterations)->default_value("1")) ("thread-pool-size", "Number of Arm NN threads to use when running the network asynchronously via the Arm NN thread pool. " - "The default is set to 0", + "The default is set to 0 which equals disabled. If 'thread-pool-size' is greater than 0 the " + "'concurrent' option is automatically set to true.", cxxopts::value(m_ExNetParams.m_ThreadPoolSize)->default_value("0")); m_CxxOptions.add_options("c) Optimization") @@ -409,6 +435,7 @@ void ProgramOptions::ParseOptions(int ac, const char* av[]) CheckRequiredOptions(m_CxxResult); CheckOptionDependencies(m_CxxResult); + CheckForDeprecatedOptions(m_CxxResult); // Some options can't be assigned directly because they need some post-processing: auto computeDevices = GetOptionValue>("compute", m_CxxResult); @@ -453,15 +480,19 @@ void ProgramOptions::ParseOptions(int ac, const char* av[]) throw armnn::InvalidArgumentException ("Invalid tflite-executor option"); } + // For backwards compatibility when deprecated options are used if (m_ExNetParams.m_EnableDelegate) { m_ExNetParams.m_TfLiteExecutor = ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate; - ARMNN_LOG(info) << fmt::format("armnn-tflite-delegate option is being depreciated, " - "please use tflite-executor instead."); + } + if (m_ExNetParams.m_SimultaneousIterations > 1) + { + m_ExNetParams.m_Iterations = m_ExNetParams.m_SimultaneousIterations; + m_ExNetParams.m_Concurrent = true; } // Set concurrent to true if the user expects to run inferences asynchronously - if (m_ExNetParams.m_SimultaneousIterations > 1 || m_ExNetParams.m_ThreadPoolSize > 0) + if (m_ExNetParams.m_ThreadPoolSize > 0) { m_ExNetParams.m_Concurrent = true; } -- cgit v1.2.1