IVGCVSW-5317 'Add enable_fast_math Option to ExecuteNetwork'

Signed-off-by: Sadik Armagan <sadik.armagan@arm.com> Change-Id: I4eb3e27837aea926593d49f9ccea07bab8388d5b
author: Sadik Armagan <sadik.armagan@arm.com> 2020-09-15 17:17:08 +0100
committer: Sadik Armagan <sadik.armagan@arm.com> 2020-09-15 16:15:52 +0000
commit: a25886e0966a6b9433cd23595688fadb88a161b2 (patch)
tree: f197d2521f7d7120a4d4397a73ca410f83a5d2de /tests
parent: 6f8699ac6e26f230a734168853c64490d70ac3bc (diff)
download: armnn-a25886e0966a6b9433cd23595688fadb88a161b2.tar.gz
3 files changed, 30 insertions, 9 deletions
diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp
index f2763a72b7..5924348763 100644
--- a/tests/ExecuteNetwork/ExecuteNetwork.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp
@@ -137,7 +137,9 @@ int main(int argc, const char* argv[])
                 "Add unsupported operators as stand-in layers (where supported by parser)")
             ("infer-output-shape", po::bool_switch()->default_value(false),
                 "Infers output tensor shape from input tensor shape and validate where applicable (where supported by "
-                "parser)");
+                "parser)")
+            ("enable_fast_math", po::bool_switch()->default_value(false),
+             "Enable fast_math computation of Convolution2D operator where applicable (where supported by backend)");
     }
     catch (const std::exception& e)
     {
@@ -187,6 +189,7 @@ int main(int argc, const char* argv[])
     bool parseUnsupported = vm["parse-unsupported"].as<bool>();
     bool timelineEnabled = vm["timeline-profiling"].as<bool>();
     bool inferOutputShape = vm["infer-output-shape"].as<bool>();
+    bool enableFastMath = vm["enable_fast_math"].as<bool>();
 
     if (enableBf16TurboMode && enableFp16TurboMode)
     {
@@ -250,7 +253,7 @@ int main(int argc, const char* argv[])
                 results.push_back(std::async(std::launch::async, RunCsvTest, std::cref(testCase), std::cref(runtime),
                                              enableProfiling, enableFp16TurboMode, enableBf16TurboMode, thresholdTime,
                                              printIntermediate, enableLayerDetails, parseUnsupported,
-                                             inferOutputShape));
+                                             inferOutputShape, enableFastMath));
             }
 
             // Check results
@@ -270,7 +273,7 @@ int main(int argc, const char* argv[])
                 testCase.values.insert(testCase.values.begin(), executableName);
                 if (RunCsvTest(testCase, runtime, enableProfiling,
                                enableFp16TurboMode, enableBf16TurboMode, thresholdTime, printIntermediate,
-                               enableLayerDetails, parseUnsupported, inferOutputShape) != EXIT_SUCCESS)
+                               enableLayerDetails, parseUnsupported, inferOutputShape, enableFastMath) != EXIT_SUCCESS)
                 {
                     return EXIT_FAILURE;
                 }
@@ -303,7 +306,7 @@ int main(int argc, const char* argv[])
                     dynamicBackendsPath, modelPath, inputNames, inputTensorDataFilePaths, inputTypes, quantizeInput,
                     outputTypes, outputNames, outputTensorFiles, dequantizeOutput, enableProfiling,
                     enableFp16TurboMode, enableBf16TurboMode, thresholdTime, printIntermediate, subgraphId,
-                    enableLayerDetails, parseUnsupported, inferOutputShape);
+                    enableLayerDetails, parseUnsupported, inferOutputShape, enableFastMath);
             }
             ARMNN_LOG(info) << "Using tuning params: " << tuningPath << "\n";
             options.m_BackendOptions.emplace_back(
@@ -336,6 +339,6 @@ int main(int argc, const char* argv[])
             inputNames, inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames,
             outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode,
             thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnsupported, inferOutputShape,
-            iterations, runtime);
+            enableFastMath, iterations, runtime);
     }
 }
diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp
index 527cc64d85..0a458c8b64 100644
--- a/tests/InferenceModel.hpp
+++ b/tests/InferenceModel.hpp
@@ -97,6 +97,7 @@ struct Params
     bool                            m_PrintIntermediateLayers;
     bool                            m_ParseUnsupported;
     bool                            m_InferOutputShape;
+    bool                            m_EnableFastMath;
 
     Params()
         : m_ComputeDevices{}
@@ -108,6 +109,7 @@ struct Params
         , m_PrintIntermediateLayers(false)
         , m_ParseUnsupported(false)
         , m_InferOutputShape(false)
+        , m_EnableFastMath(false)
     {}
 };
 
@@ -418,6 +420,17 @@ public:
             options.m_ReduceFp32ToBf16 = params.m_EnableBf16TurboMode;
             options.m_Debug = params.m_PrintIntermediateLayers;
 
+            armnn::BackendOptions gpuAcc("GpuAcc",
+            {
+                { "FastMathEnabled", params.m_EnableFastMath }
+            });
+            armnn::BackendOptions cpuAcc("CpuAcc",
+            {
+                { "FastMathEnabled", params.m_EnableFastMath }
+            });
+            options.m_ModelOptions.push_back(gpuAcc);
+            options.m_ModelOptions.push_back(cpuAcc);
+
             const auto optimization_start_time = armnn::GetTimeNow();
             optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
 
diff --git a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp
index 74f02d71cb..b5652df37f 100644
--- a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp
+++ b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp
@@ -377,6 +377,7 @@ struct ExecuteNetworkParams
     bool                          m_GenerateTensorData;
     bool                          m_ParseUnsupported = false;
     bool                          m_InferOutputShape = false;
+    bool                          m_EnableFastMath   = false;
 };
 
 template<typename TParser, typename TDataType>
@@ -400,6 +401,7 @@ int MainImpl(const ExecuteNetworkParams& params,
         inferenceModelParams.m_VisualizePostOptimizationModel = params.m_EnableLayerDetails;
         inferenceModelParams.m_ParseUnsupported               = params.m_ParseUnsupported;
         inferenceModelParams.m_InferOutputShape               = params.m_InferOutputShape;
+        inferenceModelParams.m_EnableFastMath                 = params.m_EnableFastMath;
 
         for(const std::string& inputName: params.m_InputNames)
         {
@@ -554,6 +556,7 @@ int RunTest(const std::string& format,
             bool enableLayerDetails = false,
             bool parseUnsupported = false,
             bool inferOutputShape = false,
+            bool enableFastMath   = false,
             const size_t iterations = 1,
             const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
 {
@@ -683,6 +686,7 @@ int RunTest(const std::string& format,
     params.m_GenerateTensorData       = inputTensorDataFilePathsVector.empty();
     params.m_ParseUnsupported         = parseUnsupported;
     params.m_InferOutputShape         = inferOutputShape;
+    params.m_EnableFastMath           = enableFastMath;
 
     // Warn if ExecuteNetwork will generate dummy input data
     if (params.m_GenerateTensorData)
@@ -754,7 +758,7 @@ int RunTest(const std::string& format,
 int RunCsvTest(const armnnUtils::CsvRow &csvRow, const std::shared_ptr<armnn::IRuntime>& runtime,
                const bool enableProfiling, const bool enableFp16TurboMode, const bool enableBf16TurboMode,
                const double& thresholdTime, const bool printIntermediate, bool enableLayerDetails = false,
-               bool parseUnuspported = false, bool inferOutputShape = false)
+               bool parseUnuspported = false, bool inferOutputShape = false, bool enableFastMath = false)
 {
     IgnoreUnused(runtime);
     std::string modelFormat;
@@ -875,7 +879,7 @@ int RunCsvTest(const armnnUtils::CsvRow &csvRow, const std::shared_ptr<armnn::IR
                    inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames, outputTensorFiles,
                    dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode,
                    thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnuspported,
-                   inferOutputShape);
+                   inferOutputShape, enableFastMath);
 }
 
 #if defined(ARMCOMPUTECL_ENABLED)
@@ -902,7 +906,8 @@ int RunCLTuning(const std::string& tuningPath,
             const size_t subgraphId,
             bool enableLayerDetails = false,
             bool parseUnsupported = false,
-            bool inferOutputShape = false)
+            bool inferOutputShape = false,
+            bool enableFastMath = false)
 {
     armnn::IRuntime::CreationOptions options;
     options.m_BackendOptions.emplace_back(
@@ -925,7 +930,7 @@ int RunCLTuning(const std::string& tuningPath,
                         inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames,
                         outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode,
                         thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnsupported,
-                        inferOutputShape, 1, runtime);
+                        inferOutputShape, enableFastMath, 1, runtime);
 
     ARMNN_LOG(info) << "Tuning time: " << std::setprecision(2)
                     << std::fixed << armnn::GetTimeDuration(start_time).count() << " ms\n";
author	Sadik Armagan <sadik.armagan@arm.com>	2020-09-15 17:17:08 +0100
committer	Sadik Armagan <sadik.armagan@arm.com>	2020-09-15 16:15:52 +0000
commit	a25886e0966a6b9433cd23595688fadb88a161b2 (patch)
tree	f197d2521f7d7120a4d4397a73ca410f83a5d2de /tests
parent	6f8699ac6e26f230a734168853c64490d70ac3bc (diff)
download	armnn-a25886e0966a6b9433cd23595688fadb88a161b2.tar.gz