diff options
author | Ryan OShea <ryan.oshea3@arm.com> | 2022-11-07 16:20:48 +0000 |
---|---|---|
committer | ryan.oshea3 <ryan.oshea3@arm.com> | 2022-11-16 15:22:50 +0000 |
commit | 31441595009182c985dacbedc70c41ee6664d070 (patch) | |
tree | 248a85295aeff4022c9b395fc97748b0a0aa6b35 /include/armnn/INetwork.hpp | |
parent | bd18eab07a8f30492de1e462b1815189014cb8d5 (diff) | |
download | armnn-31441595009182c985dacbedc70c41ee6664d070.tar.gz |
IVGCVSW-7214 Disable BF16-Turbo-Mode and remove conversion layers
- Remove Bf16ToFp32 Conversion Layer
- Remove Fp32ToBf16 Conversion Layer
- Remove B16 Conversion tests
* Throw exception if m_ReduceFp32ToBf16 optimzer option is set to true
* Provide comments to enable fast math in order to use bf16
* Update docs to inform users to enable fast math for bf16
Execute Network Changes
* Require bf16_turbo_mode to also have fast_math_enabled set to true
- Remove setting m_ReduceFp32ToBf16 optimizer option
Signed-off-by: Ryan OShea <ryan.oshea3@arm.com>
Change-Id: Ibaa6da9d29c96a1ce32ff5196b0847fde9f04a1c
Diffstat (limited to 'include/armnn/INetwork.hpp')
-rw-r--r-- | include/armnn/INetwork.hpp | 35 |
1 files changed, 12 insertions, 23 deletions
diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp index 687f2c3e81..2bb9ad91f3 100644 --- a/include/armnn/INetwork.hpp +++ b/include/armnn/INetwork.hpp @@ -152,10 +152,6 @@ struct OptimizerOptions , m_ExportEnabled(exportEnabled) , m_AllowExpandedDims(false) { - if (m_ReduceFp32ToFp16 && m_ReduceFp32ToBf16) - { - throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time."); - } } OptimizerOptions(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16 = false, @@ -173,10 +169,6 @@ struct OptimizerOptions , m_ExportEnabled(exportEnabled) , m_AllowExpandedDims(allowExpandedDims) { - if (m_ReduceFp32ToFp16 && m_ReduceFp32ToBf16) - { - throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time."); - } } const std::string ToString() const @@ -216,35 +208,32 @@ struct OptimizerOptions /// required. bool m_ReduceFp32ToFp16; - // Add debug data for easier troubleshooting + /// Add debug data for easier troubleshooting bool m_Debug; - // Pass debug data to separate output files for easier troubleshooting + /// Pass debug data to separate output files for easier troubleshooting bool m_DebugToFile; - /// Reduces all Fp32 operators in the model to Bf16 for faster processing. - /// @Note This feature works best if all operators of the model are in Fp32. ArmNN will add conversion layers - /// between layers that weren't in Fp32 in the first place or if the operator is not supported in Bf16. - /// The overhead of these conversions can lead to a slower overall performance if too many conversions are - /// required. + /// @Note This feature has been replaced by enabling Fast Math in compute library backend options. + /// This is currently a placeholder option bool m_ReduceFp32ToBf16; - // Infer output size when not available + /// Infer output size when not available ShapeInferenceMethod m_shapeInferenceMethod; - // Enable Import + /// Enable Import bool m_ImportEnabled; - // Enable Model Options + /// Enable Model Options ModelOptions m_ModelOptions; - // Enable profiling dump of the optimizer phase + /// Enable profiling dump of the optimizer phase bool m_ProfilingEnabled; - // Enable Export + /// Enable Export bool m_ExportEnabled; - // When calculating tensor sizes dimensions of size == 1 will be ignored + /// When calculating tensor sizes, dimensions of size == 1 will be ignored bool m_AllowExpandedDims; }; @@ -782,8 +771,8 @@ public: void ExecuteStrategy(IStrategy& strategy) const; - // Creates a copy of the IOptimizedNetwork. The IOptimizedNetwork will not be reoptimized, - // the provided ModelOptions will only be used when creating a LoadedNetwork. + /// Creates a copy of the IOptimizedNetwork. The IOptimizedNetwork will not be reoptimized, + /// the provided ModelOptions will only be used when creating a LoadedNetwork. IOptimizedNetwork(const IOptimizedNetwork& other, const ModelOptions& modelOptions); IOptimizedNetwork(std::unique_ptr<Graph> graph); IOptimizedNetwork(std::unique_ptr<OptimizedNetworkImpl> impl); |