diff options
author | Ryan OShea <ryan.oshea3@arm.com> | 2022-11-07 16:20:48 +0000 |
---|---|---|
committer | ryan.oshea3 <ryan.oshea3@arm.com> | 2022-11-16 15:22:50 +0000 |
commit | 31441595009182c985dacbedc70c41ee6664d070 (patch) | |
tree | 248a85295aeff4022c9b395fc97748b0a0aa6b35 /include | |
parent | bd18eab07a8f30492de1e462b1815189014cb8d5 (diff) | |
download | armnn-31441595009182c985dacbedc70c41ee6664d070.tar.gz |
IVGCVSW-7214 Disable BF16-Turbo-Mode and remove conversion layers
- Remove Bf16ToFp32 Conversion Layer
- Remove Fp32ToBf16 Conversion Layer
- Remove B16 Conversion tests
* Throw exception if m_ReduceFp32ToBf16 optimzer option is set to true
* Provide comments to enable fast math in order to use bf16
* Update docs to inform users to enable fast math for bf16
Execute Network Changes
* Require bf16_turbo_mode to also have fast_math_enabled set to true
- Remove setting m_ReduceFp32ToBf16 optimizer option
Signed-off-by: Ryan OShea <ryan.oshea3@arm.com>
Change-Id: Ibaa6da9d29c96a1ce32ff5196b0847fde9f04a1c
Diffstat (limited to 'include')
-rw-r--r-- | include/armnn/BackendHelper.hpp | 8 | ||||
-rw-r--r-- | include/armnn/INetwork.hpp | 35 | ||||
-rw-r--r-- | include/armnn/Types.hpp | 2 | ||||
-rw-r--r-- | include/armnn/backends/ILayerSupport.hpp | 12 | ||||
-rw-r--r-- | include/armnn/backends/WorkloadData.hpp | 10 | ||||
-rw-r--r-- | include/armnn/backends/WorkloadFactory.hpp | 10 |
6 files changed, 12 insertions, 65 deletions
diff --git a/include/armnn/BackendHelper.hpp b/include/armnn/BackendHelper.hpp index f78b4f80b9..25d719a5d5 100644 --- a/include/armnn/BackendHelper.hpp +++ b/include/armnn/BackendHelper.hpp @@ -86,14 +86,6 @@ public: bool IsConstantSupported(const TensorInfo& output, Optional<std::string&> reasonIfUnsupported = EmptyOptional()); - bool IsConvertBf16ToFp32Supported(const TensorInfo& input, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()); - - bool IsConvertFp32ToBf16Supported(const TensorInfo& input, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()); - bool IsConvertFp16ToFp32Supported(const TensorInfo& input, const TensorInfo& output, Optional<std::string&> reasonIfUnsupported = EmptyOptional()); diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp index 687f2c3e81..2bb9ad91f3 100644 --- a/include/armnn/INetwork.hpp +++ b/include/armnn/INetwork.hpp @@ -152,10 +152,6 @@ struct OptimizerOptions , m_ExportEnabled(exportEnabled) , m_AllowExpandedDims(false) { - if (m_ReduceFp32ToFp16 && m_ReduceFp32ToBf16) - { - throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time."); - } } OptimizerOptions(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16 = false, @@ -173,10 +169,6 @@ struct OptimizerOptions , m_ExportEnabled(exportEnabled) , m_AllowExpandedDims(allowExpandedDims) { - if (m_ReduceFp32ToFp16 && m_ReduceFp32ToBf16) - { - throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time."); - } } const std::string ToString() const @@ -216,35 +208,32 @@ struct OptimizerOptions /// required. bool m_ReduceFp32ToFp16; - // Add debug data for easier troubleshooting + /// Add debug data for easier troubleshooting bool m_Debug; - // Pass debug data to separate output files for easier troubleshooting + /// Pass debug data to separate output files for easier troubleshooting bool m_DebugToFile; - /// Reduces all Fp32 operators in the model to Bf16 for faster processing. - /// @Note This feature works best if all operators of the model are in Fp32. ArmNN will add conversion layers - /// between layers that weren't in Fp32 in the first place or if the operator is not supported in Bf16. - /// The overhead of these conversions can lead to a slower overall performance if too many conversions are - /// required. + /// @Note This feature has been replaced by enabling Fast Math in compute library backend options. + /// This is currently a placeholder option bool m_ReduceFp32ToBf16; - // Infer output size when not available + /// Infer output size when not available ShapeInferenceMethod m_shapeInferenceMethod; - // Enable Import + /// Enable Import bool m_ImportEnabled; - // Enable Model Options + /// Enable Model Options ModelOptions m_ModelOptions; - // Enable profiling dump of the optimizer phase + /// Enable profiling dump of the optimizer phase bool m_ProfilingEnabled; - // Enable Export + /// Enable Export bool m_ExportEnabled; - // When calculating tensor sizes dimensions of size == 1 will be ignored + /// When calculating tensor sizes, dimensions of size == 1 will be ignored bool m_AllowExpandedDims; }; @@ -782,8 +771,8 @@ public: void ExecuteStrategy(IStrategy& strategy) const; - // Creates a copy of the IOptimizedNetwork. The IOptimizedNetwork will not be reoptimized, - // the provided ModelOptions will only be used when creating a LoadedNetwork. + /// Creates a copy of the IOptimizedNetwork. The IOptimizedNetwork will not be reoptimized, + /// the provided ModelOptions will only be used when creating a LoadedNetwork. IOptimizedNetwork(const IOptimizedNetwork& other, const ModelOptions& modelOptions); IOptimizedNetwork(std::unique_ptr<Graph> graph); IOptimizedNetwork(std::unique_ptr<OptimizedNetworkImpl> impl); diff --git a/include/armnn/Types.hpp b/include/armnn/Types.hpp index 98229df07f..eebefa8752 100644 --- a/include/armnn/Types.hpp +++ b/include/armnn/Types.hpp @@ -394,9 +394,7 @@ using InferenceTimingPair = std::pair<HighResolutionClock, HighResolutionClock>; X(Comparison) \ X(Concat) \ X(Constant) \ - X(ConvertBf16ToFp32) \ X(ConvertFp16ToFp32) \ - X(ConvertFp32ToBf16) \ X(ConvertFp32ToFp16) \ X(Convolution2d) \ X(Debug) \ diff --git a/include/armnn/backends/ILayerSupport.hpp b/include/armnn/backends/ILayerSupport.hpp index b7f5f0497d..92102c11ed 100644 --- a/include/armnn/backends/ILayerSupport.hpp +++ b/include/armnn/backends/ILayerSupport.hpp @@ -109,18 +109,6 @@ public: ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. " "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "23.08") - virtual bool IsConvertBf16ToFp32Supported(const TensorInfo& input, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const; - - ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. " - "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "23.08") - virtual bool IsConvertFp32ToBf16Supported(const TensorInfo& input, - const TensorInfo& output, - Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const; - - ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. " - "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "23.08") virtual bool IsConvertFp16ToFp32Supported(const TensorInfo& input, const TensorInfo& output, Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const; diff --git a/include/armnn/backends/WorkloadData.hpp b/include/armnn/backends/WorkloadData.hpp index bd2b3ecaa9..4fbb6d423a 100644 --- a/include/armnn/backends/WorkloadData.hpp +++ b/include/armnn/backends/WorkloadData.hpp @@ -471,16 +471,6 @@ struct LstmQueueDescriptor : QueueDescriptorWithParameters<LstmDescriptor> void Validate(const WorkloadInfo& workloadInfo) const; }; -struct ConvertBf16ToFp32QueueDescriptor : QueueDescriptor -{ - void Validate(const WorkloadInfo& workloadInfo) const; -}; - -struct ConvertFp32ToBf16QueueDescriptor : QueueDescriptor -{ - void Validate(const WorkloadInfo& workloadInfo) const; -}; - struct ConvertFp16ToFp32QueueDescriptor : QueueDescriptor { void Validate(const WorkloadInfo& workloadInfo) const; diff --git a/include/armnn/backends/WorkloadFactory.hpp b/include/armnn/backends/WorkloadFactory.hpp index 4ccf1e2c7d..e69743dced 100644 --- a/include/armnn/backends/WorkloadFactory.hpp +++ b/include/armnn/backends/WorkloadFactory.hpp @@ -126,21 +126,11 @@ public: ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable " "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "23.08") - virtual std::unique_ptr<IWorkload> CreateConvertBf16ToFp32(const ConvertBf16ToFp32QueueDescriptor& descriptor, - const WorkloadInfo& info) const; - - ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable " - "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "23.08") virtual std::unique_ptr<IWorkload> CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info) const; ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable " "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "23.08") - virtual std::unique_ptr<IWorkload> CreateConvertFp32ToBf16(const ConvertFp32ToBf16QueueDescriptor& descriptor, - const WorkloadInfo& info) const; - - ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable " - "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "23.08") virtual std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info) const; |