diff options
-rw-r--r-- | tests/ExecuteNetwork/ExecuteNetwork.cpp | 19 | ||||
-rw-r--r-- | tests/InferenceModel.hpp | 9 | ||||
-rw-r--r-- | tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp | 13 |
3 files changed, 31 insertions, 10 deletions
diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp index e9811d523a..a59f58074b 100644 --- a/tests/ExecuteNetwork/ExecuteNetwork.cpp +++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp @@ -97,6 +97,8 @@ int main(int argc, const char* argv[]) "Enables built in profiler. If unset, defaults to off.") ("visualize-optimized-model,v", po::bool_switch()->default_value(false), "Enables built optimized model visualizer. If unset, defaults to off.") + ("bf16-turbo-mode", po::bool_switch()->default_value(false), "If this option is enabled, FP32 layers, " + "weights and biases will be converted to BFloat16 where the backend supports it") ("fp16-turbo-mode,h", po::bool_switch()->default_value(false), "If this option is enabled, FP32 layers, " "weights and biases will be converted to FP16 where the backend supports it") ("threshold-time,r", po::value<double>(&thresholdTime)->default_value(0.0), @@ -158,6 +160,7 @@ int main(int argc, const char* argv[]) bool concurrent = vm["concurrent"].as<bool>(); bool enableProfiling = vm["event-based-profiling"].as<bool>(); bool enableLayerDetails = vm["visualize-optimized-model"].as<bool>(); + bool enableBf16TurboMode = vm["bf16-turbo-mode"].as<bool>(); bool enableFp16TurboMode = vm["fp16-turbo-mode"].as<bool>(); bool quantizeInput = vm["quantize-input"].as<bool>(); bool dequantizeOutput = vm["dequantize-output"].as<bool>(); @@ -166,6 +169,12 @@ int main(int argc, const char* argv[]) bool fileOnlyExternalProfiling = vm["file-only-external-profiling"].as<bool>(); bool parseUnsupported = vm["parse-unsupported"].as<bool>(); + if (enableBf16TurboMode && enableFp16TurboMode) + { + ARMNN_LOG(fatal) << "BFloat16 and Float16 turbo mode cannot be enabled at the same time."; + return EXIT_FAILURE; + } + // Check whether we have to load test cases from a file. if (CheckOption(vm, "test-cases")) @@ -213,8 +222,8 @@ int main(int argc, const char* argv[]) { testCase.values.insert(testCase.values.begin(), executableName); results.push_back(std::async(std::launch::async, RunCsvTest, std::cref(testCase), std::cref(runtime), - enableProfiling, enableFp16TurboMode, thresholdTime, printIntermediate, - enableLayerDetails, parseUnsupported)); + enableProfiling, enableFp16TurboMode, enableBf16TurboMode, thresholdTime, + printIntermediate, enableLayerDetails, parseUnsupported)); } // Check results @@ -233,7 +242,7 @@ int main(int argc, const char* argv[]) { testCase.values.insert(testCase.values.begin(), executableName); if (RunCsvTest(testCase, runtime, enableProfiling, - enableFp16TurboMode, thresholdTime, printIntermediate, + enableFp16TurboMode, enableBf16TurboMode, thresholdTime, printIntermediate, enableLayerDetails, parseUnsupported) != EXIT_SUCCESS) { return EXIT_FAILURE; @@ -280,7 +289,7 @@ int main(int argc, const char* argv[]) return RunTest(modelFormat, inputTensorShapes, computeDevices, dynamicBackendsPath, modelPath, inputNames, inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames, - outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, thresholdTime, - printIntermediate, subgraphId, enableLayerDetails, parseUnsupported, runtime); + outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode, + thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnsupported, runtime); } } diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp index 50b1607743..711f7687b0 100644 --- a/tests/InferenceModel.hpp +++ b/tests/InferenceModel.hpp @@ -91,6 +91,7 @@ struct Params bool m_IsModelBinary; bool m_VisualizePostOptimizationModel; bool m_EnableFp16TurboMode; + bool m_EnableBf16TurboMode; bool m_PrintIntermediateLayers; bool m_ParseUnsupported; @@ -100,6 +101,7 @@ struct Params , m_IsModelBinary(true) , m_VisualizePostOptimizationModel(false) , m_EnableFp16TurboMode(false) + , m_EnableBf16TurboMode(false) , m_PrintIntermediateLayers(false) , m_ParseUnsupported(false) {} @@ -328,6 +330,7 @@ public: std::string m_DynamicBackendsPath; bool m_VisualizePostOptimizationModel; bool m_EnableFp16TurboMode; + bool m_EnableBf16TurboMode; std::string m_Labels; std::vector<armnn::BackendId> GetComputeDevicesAsBackendIds() @@ -365,7 +368,10 @@ public: "The file will have the same name as the model with the .dot extention.") ("fp16-turbo-mode", po::value<bool>(&options.m_EnableFp16TurboMode)->default_value(false), "If this option is enabled FP32 layers, weights and biases will be converted " - "to FP16 where the backend supports it."); + "to FP16 where the backend supports it.") + ("bf16-turbo-mode", po::value<bool>(&options.m_EnableBf16TurboMode)->default_value(false), + "If this option is enabled FP32 layers, weights and biases will be converted " + "to BF16 where the backend supports it."); } InferenceModel(const Params& params, @@ -401,6 +407,7 @@ public: armnn::OptimizerOptions options; options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode; + options.m_ReduceFp32ToBf16 = params.m_EnableBf16TurboMode; options.m_Debug = params.m_PrintIntermediateLayers; optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options); diff --git a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp index 4d996fd401..a0aeb8bc5a 100644 --- a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp +++ b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp @@ -379,6 +379,7 @@ struct ExecuteNetworkParams bool m_DequantizeOutput; bool m_EnableProfiling; bool m_EnableFp16TurboMode; + bool m_EnableBf16TurboMode; double m_ThresholdTime; bool m_PrintIntermediate; size_t m_SubgraphId; @@ -424,6 +425,7 @@ int MainImpl(const ExecuteNetworkParams& params, inferenceModelParams.m_SubgraphId = params.m_SubgraphId; inferenceModelParams.m_EnableFp16TurboMode = params.m_EnableFp16TurboMode; + inferenceModelParams.m_EnableBf16TurboMode = params.m_EnableBf16TurboMode; InferenceModel<TParser, TDataType> model(inferenceModelParams, params.m_EnableProfiling, @@ -549,6 +551,7 @@ int RunTest(const std::string& format, bool dequantizeOuput, bool enableProfiling, bool enableFp16TurboMode, + bool enableBf16TurboMode, const double& thresholdTime, bool printIntermediate, const size_t subgraphId, @@ -673,6 +676,7 @@ int RunTest(const std::string& format, params.m_DequantizeOutput = dequantizeOuput; params.m_EnableProfiling = enableProfiling; params.m_EnableFp16TurboMode = enableFp16TurboMode; + params.m_EnableBf16TurboMode = enableBf16TurboMode; params.m_ThresholdTime = thresholdTime; params.m_PrintIntermediate = printIntermediate; params.m_SubgraphId = subgraphId; @@ -748,8 +752,9 @@ int RunTest(const std::string& format, } int RunCsvTest(const armnnUtils::CsvRow &csvRow, const std::shared_ptr<armnn::IRuntime>& runtime, - const bool enableProfiling, const bool enableFp16TurboMode, const double& thresholdTime, - const bool printIntermediate, bool enableLayerDetails = false, bool parseUnuspported = false) + const bool enableProfiling, const bool enableFp16TurboMode, const bool enableBf16TurboMode, + const double& thresholdTime, const bool printIntermediate, bool enableLayerDetails = false, + bool parseUnuspported = false) { IgnoreUnused(runtime); std::string modelFormat; @@ -868,6 +873,6 @@ int RunCsvTest(const armnnUtils::CsvRow &csvRow, const std::shared_ptr<armnn::IR return RunTest(modelFormat, inputTensorShapes, computeDevices, dynamicBackendsPath, modelPath, inputNames, inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames, outputTensorFiles, - dequantizeOutput, enableProfiling, enableFp16TurboMode, thresholdTime, printIntermediate, subgraphId, - enableLayerDetails, parseUnuspported); + dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode, + thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnuspported); } |