IVGCVSW-7214 Disable BF16-Turbo-Mode and remove conversion layers

- Remove Bf16ToFp32 Conversion Layer - Remove Fp32ToBf16 Conversion Layer - Remove B16 Conversion tests * Throw exception if m_ReduceFp32ToBf16 optimzer option is set to true * Provide comments to enable fast math in order to use bf16 * Update docs to inform users to enable fast math for bf16 Execute Network Changes * Require bf16_turbo_mode to also have fast_math_enabled set to true - Remove setting m_ReduceFp32ToBf16 optimizer option Signed-off-by: Ryan OShea <ryan.oshea3@arm.com> Change-Id: Ibaa6da9d29c96a1ce32ff5196b0847fde9f04a1c
author: Ryan OShea <ryan.oshea3@arm.com> 2022-11-07 16:20:48 +0000
committer: ryan.oshea3 <ryan.oshea3@arm.com> 2022-11-16 15:22:50 +0000
commit: 31441595009182c985dacbedc70c41ee6664d070 (patch)
tree: 248a85295aeff4022c9b395fc97748b0a0aa6b35 /include/armnn
parent: bd18eab07a8f30492de1e462b1815189014cb8d5 (diff)
download: armnn-31441595009182c985dacbedc70c41ee6664d070.tar.gz
6 files changed, 12 insertions, 65 deletions
diff --git a/include/armnn/BackendHelper.hpp b/include/armnn/BackendHelper.hpp
index f78b4f80b9..25d719a5d5 100644
--- a/include/armnn/BackendHelper.hpp
+++ b/include/armnn/BackendHelper.hpp
@@ -86,14 +86,6 @@ public:
     bool IsConstantSupported(const TensorInfo& output,
                              Optional<std::string&> reasonIfUnsupported = EmptyOptional());
 
-    bool IsConvertBf16ToFp32Supported(const TensorInfo& input,
-                                      const TensorInfo& output,
-                                      Optional<std::string&> reasonIfUnsupported = EmptyOptional());
-
-    bool IsConvertFp32ToBf16Supported(const TensorInfo& input,
-                                      const TensorInfo& output,
-                                      Optional<std::string&> reasonIfUnsupported = EmptyOptional());
-
     bool IsConvertFp16ToFp32Supported(const TensorInfo& input,
                                       const TensorInfo& output,
                                       Optional<std::string&> reasonIfUnsupported = EmptyOptional());
diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp
index 687f2c3e81..2bb9ad91f3 100644
--- a/include/armnn/INetwork.hpp
+++ b/include/armnn/INetwork.hpp
@@ -152,10 +152,6 @@ struct OptimizerOptions
         , m_ExportEnabled(exportEnabled)
         , m_AllowExpandedDims(false)
     {
-        if (m_ReduceFp32ToFp16 && m_ReduceFp32ToBf16)
-        {
-            throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time.");
-        }
     }
 
     OptimizerOptions(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16 = false,
@@ -173,10 +169,6 @@ struct OptimizerOptions
         , m_ExportEnabled(exportEnabled)
         , m_AllowExpandedDims(allowExpandedDims)
     {
-        if (m_ReduceFp32ToFp16 && m_ReduceFp32ToBf16)
-        {
-            throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time.");
-        }
     }
 
     const std::string ToString() const
@@ -216,35 +208,32 @@ struct OptimizerOptions
     ///       required.
     bool m_ReduceFp32ToFp16;
 
-    // Add debug data for easier troubleshooting
+    /// Add debug data for easier troubleshooting
     bool m_Debug;
 
-    // Pass debug data to separate output files for easier troubleshooting
+    /// Pass debug data to separate output files for easier troubleshooting
     bool m_DebugToFile;
 
-    /// Reduces all Fp32 operators in the model to Bf16 for faster processing.
-    /// @Note This feature works best if all operators of the model are in Fp32. ArmNN will add conversion layers
-    ///       between layers that weren't in Fp32 in the first place or if the operator is not supported in Bf16.
-    ///       The overhead of these conversions can lead to a slower overall performance if too many conversions are
-    ///       required.
+    /// @Note This feature has been replaced by enabling Fast Math in compute library backend options.
+    /// This is currently a placeholder option
     bool m_ReduceFp32ToBf16;
 
-    // Infer output size when not available
+    /// Infer output size when not available
     ShapeInferenceMethod m_shapeInferenceMethod;
 
-    // Enable Import
+    /// Enable Import
     bool m_ImportEnabled;
 
-    // Enable Model Options
+    /// Enable Model Options
     ModelOptions m_ModelOptions;
 
-    // Enable profiling dump of the optimizer phase
+    /// Enable profiling dump of the optimizer phase
     bool m_ProfilingEnabled;
 
-    // Enable Export
+    /// Enable Export
     bool m_ExportEnabled;
 
-    // When calculating tensor sizes dimensions of size == 1 will be ignored
+    /// When calculating tensor sizes, dimensions of size == 1 will be ignored
     bool m_AllowExpandedDims;
 };
 
@@ -782,8 +771,8 @@ public:
 
     void ExecuteStrategy(IStrategy& strategy) const;
 
-    // Creates a copy of the IOptimizedNetwork. The IOptimizedNetwork will not be reoptimized,
-    // the provided ModelOptions will only be used when creating a LoadedNetwork.
+    /// Creates a copy of the IOptimizedNetwork. The IOptimizedNetwork will not be reoptimized,
+    /// the provided ModelOptions will only be used when creating a LoadedNetwork.
     IOptimizedNetwork(const IOptimizedNetwork& other, const ModelOptions& modelOptions);
     IOptimizedNetwork(std::unique_ptr<Graph> graph);
     IOptimizedNetwork(std::unique_ptr<OptimizedNetworkImpl> impl);
diff --git a/include/armnn/Types.hpp b/include/armnn/Types.hpp
index 98229df07f..eebefa8752 100644
--- a/include/armnn/Types.hpp
+++ b/include/armnn/Types.hpp
@@ -394,9 +394,7 @@ using InferenceTimingPair = std::pair<HighResolutionClock, HighResolutionClock>;
     X(Comparison) \
     X(Concat) \
     X(Constant) \
-    X(ConvertBf16ToFp32) \
     X(ConvertFp16ToFp32) \
-    X(ConvertFp32ToBf16) \
     X(ConvertFp32ToFp16) \
     X(Convolution2d) \
     X(Debug) \
diff --git a/include/armnn/backends/ILayerSupport.hpp b/include/armnn/backends/ILayerSupport.hpp
index b7f5f0497d..92102c11ed 100644
--- a/include/armnn/backends/ILayerSupport.hpp
+++ b/include/armnn/backends/ILayerSupport.hpp
@@ -109,18 +109,6 @@ public:
 
     ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
                                       "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "23.08")
-    virtual bool IsConvertBf16ToFp32Supported(const TensorInfo& input,
-                                              const TensorInfo& output,
-                                              Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
-
-    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
-                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "23.08")
-    virtual bool IsConvertFp32ToBf16Supported(const TensorInfo& input,
-                                              const TensorInfo& output,
-                                              Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
-
-    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
-                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "23.08")
     virtual bool IsConvertFp16ToFp32Supported(const TensorInfo& input,
                                               const TensorInfo& output,
                                               Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
diff --git a/include/armnn/backends/WorkloadData.hpp b/include/armnn/backends/WorkloadData.hpp
index bd2b3ecaa9..4fbb6d423a 100644
--- a/include/armnn/backends/WorkloadData.hpp
+++ b/include/armnn/backends/WorkloadData.hpp
@@ -471,16 +471,6 @@ struct LstmQueueDescriptor : QueueDescriptorWithParameters<LstmDescriptor>
     void Validate(const WorkloadInfo& workloadInfo) const;
 };
 
-struct ConvertBf16ToFp32QueueDescriptor : QueueDescriptor
-{
-    void Validate(const WorkloadInfo& workloadInfo) const;
-};
-
-struct ConvertFp32ToBf16QueueDescriptor : QueueDescriptor
-{
-    void Validate(const WorkloadInfo& workloadInfo) const;
-};
-
 struct ConvertFp16ToFp32QueueDescriptor : QueueDescriptor
 {
     void Validate(const WorkloadInfo& workloadInfo) const;
diff --git a/include/armnn/backends/WorkloadFactory.hpp b/include/armnn/backends/WorkloadFactory.hpp
index 4ccf1e2c7d..e69743dced 100644
--- a/include/armnn/backends/WorkloadFactory.hpp
+++ b/include/armnn/backends/WorkloadFactory.hpp
@@ -126,21 +126,11 @@ public:
 
     ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
     "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "23.08")
-    virtual std::unique_ptr<IWorkload> CreateConvertBf16ToFp32(const ConvertBf16ToFp32QueueDescriptor& descriptor,
-                                                               const WorkloadInfo& info) const;
-
-    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
-    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "23.08")
     virtual std::unique_ptr<IWorkload> CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor,
                                                                const WorkloadInfo& info) const;
 
     ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
     "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "23.08")
-    virtual std::unique_ptr<IWorkload> CreateConvertFp32ToBf16(const ConvertFp32ToBf16QueueDescriptor& descriptor,
-                                                               const WorkloadInfo& info) const;
-
-    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
-    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "23.08")
     virtual std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor,
                                                                const WorkloadInfo& info) const;
author	Ryan OShea <ryan.oshea3@arm.com>	2022-11-07 16:20:48 +0000
committer	ryan.oshea3 <ryan.oshea3@arm.com>	2022-11-16 15:22:50 +0000
commit	31441595009182c985dacbedc70c41ee6664d070 (patch)
tree	248a85295aeff4022c9b395fc97748b0a0aa6b35 /include/armnn
parent	bd18eab07a8f30492de1e462b1815189014cb8d5 (diff)
download	armnn-31441595009182c985dacbedc70c41ee6664d070.tar.gz