From f17fcd5fd016480acd85ee05c5636338a16bed08 Mon Sep 17 00:00:00 2001
From: Jan Eilers <jan.eilers@arm.com>
Date: Mon, 26 Jul 2021 22:20:00 +0100
Subject: Different input data for every iteration of ExecuteNetwork

 * Allows to supply different input data for every execution of a
   model when using the 'iterations' option in ExecuteNetwork
 * Removes the option 'simultaneous-iterations' because it's functionallity
   is now covered by 'iterations'
 * Adds a deprecation warning message to notify users
 * Little refactor of warning messages

Signed-off-by: Jan Eilers <jan.eilers@arm.com>
Change-Id: Ib3ab0d6533f6952bfee20d098a890b653c34cc12
---
 tests/ExecuteNetwork/ExecuteNetwork.cpp            | 113 +++++++++++--
 tests/ExecuteNetwork/ExecuteNetworkParams.cpp      | 179 +++++++++++----------
 .../ExecuteNetworkProgramOptions.cpp               |  51 ++++--
 3 files changed, 232 insertions(+), 111 deletions(-)
diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp
index a9b5a3c3f4..e757d2c992 100644
--- a/tests/ExecuteNetwork/ExecuteNetwork.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp
@@ -356,15 +356,31 @@ int MainImpl(const ExecuteNetworkParams& params,
                                                           model.GetInputQuantizationParams()) :
                                                       armnn::EmptyOptional();
 
-        for(unsigned int j = 0; j < params.m_SimultaneousIterations ; ++j)
+        if (params.m_InputTensorDataFilePaths.size() > numInputs)
+        {
+            ARMNN_LOG(info) << "Given network has " << numInputs << " input/s. One input-tensor-data file is required "
+                            << "for each input. The user provided "
+                            << params.m_InputTensorDataFilePaths.size()
+                            << " input-tensor-data file/s which will be used to fill the input/s.\n";
+        }
+
+        for(unsigned int j = 0; j < params.m_Iterations ; ++j)
         {
             std::vector<TContainer> inputDataContainers;
             for(unsigned int i = 0; i < numInputs; ++i)
             {
+                // If there are less input files given than required for the execution of
+                // params.m_Iterations we simply start with the first input file again
+                size_t inputFileIndex = j * numInputs + i;
+                if (!params.m_InputTensorDataFilePaths.empty())
+                {
+                    inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size();
+                }
+
                 armnn::Optional<std::string> dataFile = params.m_GenerateTensorData ?
                                                         armnn::EmptyOptional() :
                                                         armnn::MakeOptional<std::string>(
-                                                            params.m_InputTensorDataFilePaths[(j * numInputs) + i]);
+                                                            params.m_InputTensorDataFilePaths.at(inputFileIndex));
 
                 unsigned int numElements = model.GetInputSize(i);
                 if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i])
@@ -388,7 +404,7 @@ int MainImpl(const ExecuteNetworkParams& params,
 
         const size_t numOutputs = inferenceModelParams.m_OutputBindings.size();
 
-        for (unsigned int j = 0; j < params.m_SimultaneousIterations; ++j)
+        for (unsigned int j = 0; j < params.m_Iterations; ++j)
         {
             std::vector <TContainer> outputDataContainers;
             for (unsigned int i = 0; i < numOutputs; ++i)
@@ -418,13 +434,30 @@ int MainImpl(const ExecuteNetworkParams& params,
             outputs.push_back(outputDataContainers);
         }
 
+        if (params.m_Iterations > 1)
+        {
+            std::stringstream msg;
+            msg << "Network will be executed " << params.m_Iterations;
+            if (params.m_Concurrent)
+            {
+                msg << " times in an asynchronous manner. ";
+            }
+            else
+            {
+                msg << " times successively. ";
+            }
+            msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
+                   "cover each execution.";
+            ARMNN_LOG(info) << msg.str();
+        }
+
         // Synchronous execution
         if (!params.m_Concurrent)
         {
             for (size_t x = 0; x < params.m_Iterations; x++)
             {
                 // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
-                auto inference_duration = model.Run(inputs[0], outputs[0]);
+                auto inference_duration = model.Run(inputs[x], outputs[x]);
 
                 if (params.m_GenerateTensorData)
                 {
@@ -436,13 +469,29 @@ int MainImpl(const ExecuteNetworkParams& params,
                 for (size_t i = 0; i < numOutputs; i++)
                 {
                     const armnn::TensorInfo& infoOut = infosOut[i].second;
-                    auto outputTensorFile = params.m_OutputTensorFiles.empty() ? "" : params.m_OutputTensorFiles[i];
+
+                    // We've made sure before that the number of output files either equals numOutputs, in which case
+                    // we override those files when processing the results of each iteration (only the result of the
+                    // last iteration will be stored), or there are enough
+                    // output files for each output of each iteration.
+                    size_t outputFileIndex = x * numOutputs + i;
+                    if (!params.m_OutputTensorFiles.empty())
+                    {
+                        outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
+                        ARMNN_LOG(info) << "Writing output " << i << " named: '"
+                                        << inferenceModelParams.m_OutputBindings[i]
+                                        << "' of iteration: " << x+1 << " to file: '"
+                                        << params.m_OutputTensorFiles[outputFileIndex] << "'";
+                    }
+                    auto outputTensorFile = params.m_OutputTensorFiles.empty()
+                                            ? ""
+                                            : params.m_OutputTensorFiles[outputFileIndex];
 
                     TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
                                           infoOut,
                                           outputTensorFile,
                                           params.m_DequantizeOutput);
-                    mapbox::util::apply_visitor(printer, outputs[0][i]);
+                    mapbox::util::apply_visitor(printer, outputs[x][i]);
                 }
 
                 ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
@@ -481,7 +530,7 @@ int MainImpl(const ExecuteNetworkParams& params,
 
                 // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
                 // LoadedNetwork with each scheduled inference having a specific priority
-                for (size_t i = 0; i < params.m_SimultaneousIterations; ++i)
+                for (size_t i = 0; i < params.m_Iterations; ++i)
                 {
                     std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
                     inferenceOutputMap.insert({cb->GetInferenceId(), outputs[i]});
@@ -490,7 +539,7 @@ int MainImpl(const ExecuteNetworkParams& params,
 
                 // Check the results
                 unsigned int j = 0;
-                for (size_t iteration = 0; iteration < params.m_SimultaneousIterations; ++iteration)
+                for (size_t iteration = 0; iteration < params.m_Iterations; ++iteration)
                 {
                     auto cb = callbackManager.GetNotifiedCallback();
 
@@ -522,10 +571,24 @@ int MainImpl(const ExecuteNetworkParams& params,
                     const auto& infosOut = model.GetOutputBindingInfos();
                     for (size_t i = 0; i < numOutputs; i++)
                     {
+                        // We've made sure before that the number of output files either equals numOutputs, in which
+                        // case we override those files when processing the results of each iteration (only the result
+                        // of the last iteration will be stored), or there are enough
+                        // output files for each output of each iteration.
+                        size_t outputFileIndex = iteration * numOutputs + i;
+                        if (!params.m_OutputTensorFiles.empty())
+                        {
+                            outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
+                            ARMNN_LOG(info) << "Writing output " << i << " named: '"
+                                            << inferenceModelParams.m_OutputBindings[i]
+                                            << "' of iteration: " << iteration+1 << " to file: '"
+                                            << params.m_OutputTensorFiles[outputFileIndex] << "'";
+                        }
+
                         const armnn::TensorInfo& infoOut = infosOut[i].second;
                         auto outputTensorFile = params.m_OutputTensorFiles.empty()
                                                 ? ""
-                                                : params.m_OutputTensorFiles[(j * numOutputs) + i];
+                                                : params.m_OutputTensorFiles[outputFileIndex];
 
                         TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
                                               infoOut,
@@ -575,12 +638,12 @@ int MainImpl(const ExecuteNetworkParams& params,
                 ARMNN_LOG(info) << "Asynchronous Execution with std::launch:async...  \n";
                 std::vector<std::future<std::tuple<unsigned int,
                     std::chrono::duration<double, std::milli>>>> inferenceResults;
-                inferenceResults.reserve(params.m_SimultaneousIterations);
+                inferenceResults.reserve(params.m_Iterations);
 
                 // Create WorkingMemHandles for each inference
                 std::vector<std::unique_ptr<armnn::experimental::IWorkingMemHandle>> workingMemHandles;
-                workingMemHandles.reserve(params.m_SimultaneousIterations);
-                for (unsigned int i = 0; i < params.m_SimultaneousIterations; ++i)
+                workingMemHandles.reserve(params.m_Iterations);
+                for (unsigned int i = 0; i < params.m_Iterations; ++i)
                 {
                     workingMemHandles.push_back(model.CreateWorkingMemHandle());
                 }
@@ -588,7 +651,7 @@ int MainImpl(const ExecuteNetworkParams& params,
                 // Run each inference in its own thread
                 // start a timer
                 const auto start_time = armnn::GetTimeNow();
-                for (unsigned int i = 0; i < params.m_SimultaneousIterations; ++i)
+                for (unsigned int i = 0; i < params.m_Iterations; ++i)
                 {
                     armnn::experimental::IWorkingMemHandle& workingMemHandleRef = *workingMemHandles[i].get();
 
@@ -616,10 +679,23 @@ int MainImpl(const ExecuteNetworkParams& params,
                     const auto& infosOut = model.GetOutputBindingInfos();
                     for (size_t i = 0; i < numOutputs; i++)
                     {
+                        // We've made sure before that the number of output files either equals numOutputs, in which
+                        // case we override those files when processing the results of each iteration (only the result
+                        // of the last iteration will be stored), or there are enough
+                        // output files for each output of each iteration.
+                        size_t outputFileIndex = j * numOutputs + i;
+                        if (!params.m_OutputTensorFiles.empty())
+                        {
+                            outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
+                            ARMNN_LOG(info) << "Writing output " << i << " named: '"
+                                            << inferenceModelParams.m_OutputBindings[i]
+                                            << "' of iteration: " << j+1 << " to file: '"
+                                            << params.m_OutputTensorFiles[outputFileIndex] << "'";
+                        }
                         const armnn::TensorInfo& infoOut = infosOut[i].second;
                         auto outputTensorFile = params.m_OutputTensorFiles.empty()
                                                 ? ""
-                                                : params.m_OutputTensorFiles[(j * numOutputs) + i];
+                                                : params.m_OutputTensorFiles[outputFileIndex];
 
                         TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
                                               infoOut,
@@ -683,7 +759,14 @@ int main(int argc, const char* argv[])
 
 
     // Get ExecuteNetwork parameters and runtime options from command line
-    ProgramOptions ProgramOptions(argc, argv);
+    // This might throw an InvalidArgumentException if the user provided invalid inputs
+    ProgramOptions ProgramOptions;
+    try {
+        ProgramOptions.ParseOptions(argc, argv);
+    } catch (const std::exception &e){
+        ARMNN_LOG(fatal) << e.what();
+        return EXIT_FAILURE;
+    }
 
     // Create runtime
     std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions));
diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp
index 4002e89eba..dcdd423246 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp
@@ -110,115 +110,122 @@ void CheckClTuningParameter(const int& tuningLevel,
 
 void ExecuteNetworkParams::ValidateParams()
 {
-    // Set to true if it is preferred to throw an exception rather than use ARMNN_LOG
-    bool throwExc = false;
-
-    try
+    if (m_DynamicBackendsPath == "")
     {
-        if (m_DynamicBackendsPath == "")
+        // Check compute devices are valid unless they are dynamically loaded at runtime
+        std::string invalidBackends;
+        if (!CheckRequestedBackendsAreValid(m_ComputeDevices, armnn::Optional<std::string&>(invalidBackends)))
         {
-            // Check compute devices are valid unless they are dynamically loaded at runtime
-            std::string invalidBackends;
-            if (!CheckRequestedBackendsAreValid(m_ComputeDevices, armnn::Optional<std::string&>(invalidBackends)))
-            {
-                ARMNN_LOG(fatal) << "The list of preferred devices contains invalid backend IDs: "
-                                 << invalidBackends;
-            }
+            ARMNN_LOG(fatal) << "The list of preferred devices contains invalid backend IDs: "
+                             << invalidBackends;
         }
+    }
 
-        CheckClTuningParameter(m_TuningLevel, m_TuningPath, m_ComputeDevices);
+    CheckClTuningParameter(m_TuningLevel, m_TuningPath, m_ComputeDevices);
 
-        if (m_EnableBf16TurboMode && m_EnableFp16TurboMode)
-        {
-            ARMNN_LOG(fatal) << "BFloat16 and Float16 turbo mode cannot be enabled at the same time.";
-        }
+    if (m_EnableBf16TurboMode && m_EnableFp16TurboMode)
+    {
+        throw armnn::InvalidArgumentException("BFloat16 and Float16 turbo mode cannot be "
+                                              "enabled at the same time.");
+    }
 
-        m_IsModelBinary = IsModelBinary(m_ModelFormat);
+    m_IsModelBinary = IsModelBinary(m_ModelFormat);
 
-        CheckModelFormat(m_ModelFormat);
+    CheckModelFormat(m_ModelFormat);
 
-        // Check input tensor shapes
-        if ((m_InputTensorShapes.size() != 0) &&
-            (m_InputTensorShapes.size() != m_InputNames.size()))
-        {
-            ARMNN_LOG(fatal) << "input-name and input-tensor-shape must have the same amount of elements. ";
-        }
+    // Check input tensor shapes
+    if ((m_InputTensorShapes.size() != 0) &&
+        (m_InputTensorShapes.size() != m_InputNames.size()))
+    {
+        throw armnn::InvalidArgumentException("input-name and input-tensor-shape must have "
+                                              "the same amount of elements. ");
+    }
 
-        if (m_InputTensorDataFilePaths.size() != 0)
+    if (m_InputTensorDataFilePaths.size() != 0)
+    {
+        if (!ValidatePaths(m_InputTensorDataFilePaths, true))
         {
-            if (!ValidatePaths(m_InputTensorDataFilePaths, true))
-            {
-                ARMNN_LOG(fatal) << "One or more input data file paths are not valid. ";
-            }
-
-            if (!m_Concurrent && m_InputTensorDataFilePaths.size() != m_InputNames.size())
-            {
-                ARMNN_LOG(fatal) << "input-name and input-tensor-data must have the same amount of elements. ";
-            }
-
-            if (m_InputTensorDataFilePaths.size() < m_SimultaneousIterations * m_InputNames.size())
-            {
-                ARMNN_LOG(fatal) << "There is not enough input data for " << m_SimultaneousIterations << " execution.";
-            }
-            if (m_InputTensorDataFilePaths.size() > m_SimultaneousIterations * m_InputNames.size())
-            {
-                ARMNN_LOG(fatal) << "There is more input data for " << m_SimultaneousIterations << " execution.";
-            }
+            throw armnn::InvalidArgumentException("One or more input data file paths are not valid.");
         }
 
-        if ((m_OutputTensorFiles.size() != 0) &&
-            (m_OutputTensorFiles.size() != m_OutputNames.size()))
+        if (m_InputTensorDataFilePaths.size() < m_InputNames.size())
         {
-            ARMNN_LOG(fatal) << "output-name and write-outputs-to-file must have the same amount of elements. ";
+            throw armnn::InvalidArgumentException(
+                    fmt::format("According to the number of input names the user provided the network has {} "
+                                "inputs. But only {} input-tensor-data file paths were provided. Each input of the "
+                                "model is expected to be stored in it's own file.",
+                                m_InputNames.size(),
+                                m_InputTensorDataFilePaths.size()));
         }
-
-        if ((m_OutputTensorFiles.size() != 0)
-            && m_OutputTensorFiles.size() != m_SimultaneousIterations * m_OutputNames.size())
+        else if (m_InputTensorDataFilePaths.size() % m_InputNames.size() != 0)
         {
-            ARMNN_LOG(fatal) << "There is not enough output data for " << m_SimultaneousIterations << " execution.";
+            throw armnn::InvalidArgumentException(
+                    fmt::format("According to the number of input names the user provided the network has {} "
+                                "inputs. The user specified {} input-tensor-data file paths which is not "
+                                "divisible by the number of inputs.",
+                                m_InputNames.size(),
+                                m_InputTensorDataFilePaths.size()));
         }
+    }
 
-        if (m_InputTypes.size() == 0)
-        {
-            //Defaults the value of all inputs to "float"
-            m_InputTypes.assign(m_InputNames.size(), "float");
-        }
-        else if ((m_InputTypes.size() != 0) &&
-                 (m_InputTypes.size() != m_InputNames.size()))
-        {
-            ARMNN_LOG(fatal) << "input-name and input-type must have the same amount of elements.";
-        }
+    if (m_InputTypes.size() == 0)
+    {
+        //Defaults the value of all inputs to "float"
+        m_InputTypes.assign(m_InputNames.size(), "float");
+    }
+    else if ((m_InputTypes.size() != 0) &&
+             (m_InputTypes.size() != m_InputNames.size()))
+    {
+        throw armnn::InvalidArgumentException("input-name and input-type must have the same amount of elements.");
+    }
 
-        if (m_OutputTypes.size() == 0)
-        {
-            //Defaults the value of all outputs to "float"
-            m_OutputTypes.assign(m_OutputNames.size(), "float");
-        }
-        else if ((m_OutputTypes.size() != 0) &&
-                 (m_OutputTypes.size() != m_OutputNames.size()))
-        {
-            ARMNN_LOG(fatal) << "output-name and output-type must have the same amount of elements.";
-        }
+    // Make sure that the number of input files given is divisible by the number of inputs of the model
+    if (!(m_InputTensorDataFilePaths.size() % m_InputNames.size() == 0))
+    {
+        throw armnn::InvalidArgumentException(
+                fmt::format("The number of input-tensor-data files ({0}) is not divisible by the "
+                            "number of inputs ({1} according to the number of input names).",
+                            m_InputTensorDataFilePaths.size(),
+                            m_InputNames.size()));
+    }
 
-        // Check that threshold time is not less than zero
-        if (m_ThresholdTime < 0)
-        {
-            ARMNN_LOG(fatal) << "Threshold time supplied as a command line argument is less than zero.";
-        }
+    if (m_OutputTypes.size() == 0)
+    {
+        //Defaults the value of all outputs to "float"
+        m_OutputTypes.assign(m_OutputNames.size(), "float");
     }
-    catch (std::string& exc)
+    else if ((m_OutputTypes.size() != 0) &&
+             (m_OutputTypes.size() != m_OutputNames.size()))
     {
-        if (throwExc)
-        {
-            throw armnn::InvalidArgumentException(exc);
-        }
-        else
-        {
-            std::cout << exc;
-            exit(EXIT_FAILURE);
+        throw armnn::InvalidArgumentException("output-name and output-type must have the same amount of elements.");
+    }
+
+    // Make sure that the number of output files given is equal to the number of outputs of the model
+    // or equal to the number of outputs of the model multiplied with the number of iterations
+    if (!m_OutputTensorFiles.empty())
+    {
+        if ((m_OutputTensorFiles.size() != m_OutputNames.size()) &&
+            (m_OutputTensorFiles.size() != m_OutputNames.size() * m_Iterations))
+        {
+            std::stringstream errmsg;
+            auto numOutputs = m_OutputNames.size();
+            throw armnn::InvalidArgumentException(
+                    fmt::format("The user provided {0} output-tensor files. The only allowed number of output-tensor "
+                                "files is the number of outputs of the network ({1} according to the number of "
+                                "output names) or the number of outputs multiplied with the number of times the "
+                                "network should be executed (NumOutputs * NumIterations = {1} * {2} = {3}).",
+                                m_OutputTensorFiles.size(),
+                                numOutputs,
+                                m_Iterations,
+                                numOutputs*m_Iterations));
         }
     }
-    // Check turbo modes
+
+    // Check that threshold time is not less than zero
+    if (m_ThresholdTime < 0)
+    {
+        throw armnn::InvalidArgumentException("Threshold time supplied as a command line argument is less than zero.");
+    }
 
     // Warn if ExecuteNetwork will generate dummy input data
     if (m_GenerateTensorData)
diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
index b12547f51c..6ac64ffff2 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
@@ -139,6 +139,20 @@ void CheckRequiredOptions(const cxxopts::ParseResult& result)
     }
 }
 
+void CheckForDeprecatedOptions(const cxxopts::ParseResult& result)
+{
+    if(result.count("simultaneous-iterations") > 0)
+    {
+        ARMNN_LOG(warning) << "DEPRECATED: The program option 'simultaneous-iterations' is deprecated and will be "
+                              "removed soon. Please use the option 'iterations' combined with 'concurrent' instead.";
+    }
+    if(result.count("armnn-tflite-delegate") > 0)
+    {
+        ARMNN_LOG(warning) << "DEPRECATED: The program option 'armnn-tflite-delegate' is deprecated and will be "
+                              "removed soon. Please use the option 'tflite-executor' instead.";
+    }
+}
+
 void ProgramOptions::ValidateExecuteNetworkParams()
 {
     m_ExNetParams.ValidateParams();
@@ -196,13 +210,15 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
 
                 ("n,concurrent",
                  "This option is for Arm NN internal asynchronous testing purposes. "
-                 "By default it is set to true if thread-pool-size or simultaneous-iterations are greater than 1",
+                 "False by default. If set to true will use std::launch::async or the Arm NN thread pool, "
+                 "if 'thread-pool-size' is greater than 0, for asynchronous execution.",
                  cxxopts::value<bool>(m_ExNetParams.m_Concurrent)->default_value("false")->implicit_value("true"))
 
                 ("d,input-tensor-data",
                  "Path to files containing the input data as a flat array separated by whitespace. "
-                 "Several paths can be passed by separating them with a comma. If not specified, the network will be "
-                 "run with dummy data (useful for profiling).",
+                 "Several paths can be passed by separating them with a comma if the network has multiple inputs "
+                 "or you wish to run the model multiple times with different input data using the 'iterations' option. "
+                 "If not specified, the network will be run with dummy data (useful for profiling).",
                  cxxopts::value<std::string>()->default_value(""))
 
                 ("h,help", "Display usage information")
@@ -213,7 +229,14 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
                  cxxopts::value<bool>(m_ExNetParams.m_InferOutputShape)->default_value("false")->implicit_value("true"))
 
                 ("iterations",
-                 "Number of iterations to run the network for, default is set to 1",
+                 "Number of iterations to run the network for, default is set to 1. "
+                 "If you wish to run the model with different input data for every execution you can do so by "
+                 "supplying more input file paths to the 'input-tensor-data' option. "
+                 "Note: The number of input files provided must be divisible by the number of inputs of the model. "
+                 "e.g. Your model has 2 inputs and you supply 4 input files. If you set 'iterations' to 6 the first "
+                 "run will consume the first two inputs, the second the next two and the last will begin from the "
+                 "start and use the first two inputs again. "
+                 "Note: If the 'concurrent' option is enabled all iterations will be run asynchronously.",
                  cxxopts::value<size_t>(m_ExNetParams.m_Iterations)->default_value("1"))
 
                 ("l,dequantize-output",
@@ -282,17 +305,20 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
 
                 ("D,armnn-tflite-delegate",
                  "Enable Arm NN TfLite delegate. "
-                 "This option is depreciated please use tflite-executor instead",
+                 "DEPRECATED: This option is deprecated please use tflite-executor instead",
                  cxxopts::value<bool>(m_ExNetParams.m_EnableDelegate)->default_value("false")->implicit_value("true"))
 
                 ("simultaneous-iterations",
                  "Number of simultaneous iterations to async-run the network for, default is set to 1 (disabled). "
-                 "When thread-pool-size is set the Arm NN thread pool is used. Otherwise std::launch::async is used.",
+                 "When thread-pool-size is set the Arm NN thread pool is used. Otherwise std::launch::async is used."
+                 "DEPRECATED: This option is deprecated and will be removed soon. "
+                 "Please use the option 'iterations' combined with 'concurrent' instead.",
                  cxxopts::value<size_t>(m_ExNetParams.m_SimultaneousIterations)->default_value("1"))
 
                 ("thread-pool-size",
                  "Number of Arm NN threads to use when running the network asynchronously via the Arm NN thread pool. "
-                 "The default is set to 0",
+                 "The default is set to 0 which equals disabled. If 'thread-pool-size' is greater than 0 the "
+                 "'concurrent' option is automatically set to true.",
                  cxxopts::value<size_t>(m_ExNetParams.m_ThreadPoolSize)->default_value("0"));
 
         m_CxxOptions.add_options("c) Optimization")
@@ -409,6 +435,7 @@ void ProgramOptions::ParseOptions(int ac, const char* av[])
 
     CheckRequiredOptions(m_CxxResult);
     CheckOptionDependencies(m_CxxResult);
+    CheckForDeprecatedOptions(m_CxxResult);
 
     // Some options can't be assigned directly because they need some post-processing:
     auto computeDevices = GetOptionValue<std::vector<std::string>>("compute", m_CxxResult);
@@ -453,15 +480,19 @@ void ProgramOptions::ParseOptions(int ac, const char* av[])
         throw armnn::InvalidArgumentException ("Invalid tflite-executor option");
     }
 
+    // For backwards compatibility when deprecated options are used
     if (m_ExNetParams.m_EnableDelegate)
     {
         m_ExNetParams.m_TfLiteExecutor = ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate;
-        ARMNN_LOG(info) << fmt::format("armnn-tflite-delegate option is being depreciated, "
-                                       "please use tflite-executor instead.");
+    }
+    if (m_ExNetParams.m_SimultaneousIterations > 1)
+    {
+        m_ExNetParams.m_Iterations = m_ExNetParams.m_SimultaneousIterations;
+        m_ExNetParams.m_Concurrent = true;
     }
 
     // Set concurrent to true if the user expects to run inferences asynchronously
-    if (m_ExNetParams.m_SimultaneousIterations > 1 || m_ExNetParams.m_ThreadPoolSize > 0)
+    if (m_ExNetParams.m_ThreadPoolSize > 0)
     {
         m_ExNetParams.m_Concurrent = true;
     }
-- 
cgit v1.2.1