diff options
-rw-r--r-- | include/armnn/backends/IBackendInternal.hpp | 9 | ||||
-rw-r--r-- | src/backends/backendsCommon/IBackendInternal.cpp | 42 | ||||
-rw-r--r-- | tests/ExecuteNetwork/ExecuteNetwork.cpp | 13 | ||||
-rw-r--r-- | tests/InferenceModel.hpp | 13 | ||||
-rw-r--r-- | tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp | 13 |
5 files changed, 67 insertions, 23 deletions
diff --git a/include/armnn/backends/IBackendInternal.hpp b/include/armnn/backends/IBackendInternal.hpp index 4815529d6d..5f1b413d83 100644 --- a/include/armnn/backends/IBackendInternal.hpp +++ b/include/armnn/backends/IBackendInternal.hpp @@ -143,14 +143,7 @@ public: virtual ILayerSupportSharedPtr GetLayerSupport() const = 0; - virtual ILayerSupportSharedPtr GetLayerSupport(const ModelOptions& modelOptions) const - { - if (modelOptions.empty()) - { - return GetLayerSupport(); - } - return GetLayerSupport(modelOptions); - } + virtual ILayerSupportSharedPtr GetLayerSupport(const ModelOptions& modelOptions) const; virtual OptimizationViews OptimizeSubgraphView(const SubgraphView& subgraph) const; diff --git a/src/backends/backendsCommon/IBackendInternal.cpp b/src/backends/backendsCommon/IBackendInternal.cpp index 08060331e7..81fc515b98 100644 --- a/src/backends/backendsCommon/IBackendInternal.cpp +++ b/src/backends/backendsCommon/IBackendInternal.cpp @@ -43,22 +43,36 @@ IBackendInternal::IWorkloadFactoryPtr IBackendInternal::CreateWorkloadFactory( const IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const { - if(modelOptions.empty()) + if (!modelOptions.empty()) { - return CreateWorkloadFactory(memoryManager); + for (auto optionsGroup : modelOptions) + { + if (optionsGroup.GetBackendId() == GetId()) + { + return IWorkloadFactoryPtr{}; + } + } } - return IWorkloadFactoryPtr{}; + + return CreateWorkloadFactory(memoryManager); } IBackendInternal::IWorkloadFactoryPtr IBackendInternal::CreateWorkloadFactory( class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, const ModelOptions& modelOptions) const { - if(modelOptions.empty()) + if (!modelOptions.empty()) { - return CreateWorkloadFactory(tensorHandleFactoryRegistry); + for (auto optionsGroup : modelOptions) + { + if (optionsGroup.GetBackendId() == GetId()) + { + return IWorkloadFactoryPtr{}; + } + } } - return IWorkloadFactoryPtr{}; + + return CreateWorkloadFactory(tensorHandleFactoryRegistry); } IBackendInternal::IBackendContextPtr IBackendInternal::CreateBackendContext(const IRuntime::CreationOptions&) const @@ -78,6 +92,22 @@ IBackendInternal::IBackendProfilingContextPtr IBackendInternal::CreateBackendPro return IBackendProfilingContextPtr{}; } +IBackendInternal::ILayerSupportSharedPtr IBackendInternal::GetLayerSupport(const ModelOptions& modelOptions) const +{ + if (!modelOptions.empty()) + { + for (auto optionsGroup : modelOptions) + { + if (optionsGroup.GetBackendId() == GetId()) + { + return ILayerSupportSharedPtr{}; + } + } + } + + return GetLayerSupport(); +} + // Default implementation of OptimizeSubgraphView for backward compatibility with the old API. // Override this method with a custom optimization implementation. OptimizationViews IBackendInternal::OptimizeSubgraphView(const SubgraphView& subgraph) const diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp index f2763a72b7..5924348763 100644 --- a/tests/ExecuteNetwork/ExecuteNetwork.cpp +++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp @@ -137,7 +137,9 @@ int main(int argc, const char* argv[]) "Add unsupported operators as stand-in layers (where supported by parser)") ("infer-output-shape", po::bool_switch()->default_value(false), "Infers output tensor shape from input tensor shape and validate where applicable (where supported by " - "parser)"); + "parser)") + ("enable_fast_math", po::bool_switch()->default_value(false), + "Enable fast_math computation of Convolution2D operator where applicable (where supported by backend)"); } catch (const std::exception& e) { @@ -187,6 +189,7 @@ int main(int argc, const char* argv[]) bool parseUnsupported = vm["parse-unsupported"].as<bool>(); bool timelineEnabled = vm["timeline-profiling"].as<bool>(); bool inferOutputShape = vm["infer-output-shape"].as<bool>(); + bool enableFastMath = vm["enable_fast_math"].as<bool>(); if (enableBf16TurboMode && enableFp16TurboMode) { @@ -250,7 +253,7 @@ int main(int argc, const char* argv[]) results.push_back(std::async(std::launch::async, RunCsvTest, std::cref(testCase), std::cref(runtime), enableProfiling, enableFp16TurboMode, enableBf16TurboMode, thresholdTime, printIntermediate, enableLayerDetails, parseUnsupported, - inferOutputShape)); + inferOutputShape, enableFastMath)); } // Check results @@ -270,7 +273,7 @@ int main(int argc, const char* argv[]) testCase.values.insert(testCase.values.begin(), executableName); if (RunCsvTest(testCase, runtime, enableProfiling, enableFp16TurboMode, enableBf16TurboMode, thresholdTime, printIntermediate, - enableLayerDetails, parseUnsupported, inferOutputShape) != EXIT_SUCCESS) + enableLayerDetails, parseUnsupported, inferOutputShape, enableFastMath) != EXIT_SUCCESS) { return EXIT_FAILURE; } @@ -303,7 +306,7 @@ int main(int argc, const char* argv[]) dynamicBackendsPath, modelPath, inputNames, inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames, outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode, thresholdTime, printIntermediate, subgraphId, - enableLayerDetails, parseUnsupported, inferOutputShape); + enableLayerDetails, parseUnsupported, inferOutputShape, enableFastMath); } ARMNN_LOG(info) << "Using tuning params: " << tuningPath << "\n"; options.m_BackendOptions.emplace_back( @@ -336,6 +339,6 @@ int main(int argc, const char* argv[]) inputNames, inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames, outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode, thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnsupported, inferOutputShape, - iterations, runtime); + enableFastMath, iterations, runtime); } } diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp index 527cc64d85..0a458c8b64 100644 --- a/tests/InferenceModel.hpp +++ b/tests/InferenceModel.hpp @@ -97,6 +97,7 @@ struct Params bool m_PrintIntermediateLayers; bool m_ParseUnsupported; bool m_InferOutputShape; + bool m_EnableFastMath; Params() : m_ComputeDevices{} @@ -108,6 +109,7 @@ struct Params , m_PrintIntermediateLayers(false) , m_ParseUnsupported(false) , m_InferOutputShape(false) + , m_EnableFastMath(false) {} }; @@ -418,6 +420,17 @@ public: options.m_ReduceFp32ToBf16 = params.m_EnableBf16TurboMode; options.m_Debug = params.m_PrintIntermediateLayers; + armnn::BackendOptions gpuAcc("GpuAcc", + { + { "FastMathEnabled", params.m_EnableFastMath } + }); + armnn::BackendOptions cpuAcc("CpuAcc", + { + { "FastMathEnabled", params.m_EnableFastMath } + }); + options.m_ModelOptions.push_back(gpuAcc); + options.m_ModelOptions.push_back(cpuAcc); + const auto optimization_start_time = armnn::GetTimeNow(); optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options); diff --git a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp index 74f02d71cb..b5652df37f 100644 --- a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp +++ b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp @@ -377,6 +377,7 @@ struct ExecuteNetworkParams bool m_GenerateTensorData; bool m_ParseUnsupported = false; bool m_InferOutputShape = false; + bool m_EnableFastMath = false; }; template<typename TParser, typename TDataType> @@ -400,6 +401,7 @@ int MainImpl(const ExecuteNetworkParams& params, inferenceModelParams.m_VisualizePostOptimizationModel = params.m_EnableLayerDetails; inferenceModelParams.m_ParseUnsupported = params.m_ParseUnsupported; inferenceModelParams.m_InferOutputShape = params.m_InferOutputShape; + inferenceModelParams.m_EnableFastMath = params.m_EnableFastMath; for(const std::string& inputName: params.m_InputNames) { @@ -554,6 +556,7 @@ int RunTest(const std::string& format, bool enableLayerDetails = false, bool parseUnsupported = false, bool inferOutputShape = false, + bool enableFastMath = false, const size_t iterations = 1, const std::shared_ptr<armnn::IRuntime>& runtime = nullptr) { @@ -683,6 +686,7 @@ int RunTest(const std::string& format, params.m_GenerateTensorData = inputTensorDataFilePathsVector.empty(); params.m_ParseUnsupported = parseUnsupported; params.m_InferOutputShape = inferOutputShape; + params.m_EnableFastMath = enableFastMath; // Warn if ExecuteNetwork will generate dummy input data if (params.m_GenerateTensorData) @@ -754,7 +758,7 @@ int RunTest(const std::string& format, int RunCsvTest(const armnnUtils::CsvRow &csvRow, const std::shared_ptr<armnn::IRuntime>& runtime, const bool enableProfiling, const bool enableFp16TurboMode, const bool enableBf16TurboMode, const double& thresholdTime, const bool printIntermediate, bool enableLayerDetails = false, - bool parseUnuspported = false, bool inferOutputShape = false) + bool parseUnuspported = false, bool inferOutputShape = false, bool enableFastMath = false) { IgnoreUnused(runtime); std::string modelFormat; @@ -875,7 +879,7 @@ int RunCsvTest(const armnnUtils::CsvRow &csvRow, const std::shared_ptr<armnn::IR inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames, outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode, thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnuspported, - inferOutputShape); + inferOutputShape, enableFastMath); } #if defined(ARMCOMPUTECL_ENABLED) @@ -902,7 +906,8 @@ int RunCLTuning(const std::string& tuningPath, const size_t subgraphId, bool enableLayerDetails = false, bool parseUnsupported = false, - bool inferOutputShape = false) + bool inferOutputShape = false, + bool enableFastMath = false) { armnn::IRuntime::CreationOptions options; options.m_BackendOptions.emplace_back( @@ -925,7 +930,7 @@ int RunCLTuning(const std::string& tuningPath, inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames, outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode, thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnsupported, - inferOutputShape, 1, runtime); + inferOutputShape, enableFastMath, 1, runtime); ARMNN_LOG(info) << "Tuning time: " << std::setprecision(2) << std::fixed << armnn::GetTimeDuration(start_time).count() << " ms\n"; |