src/armnn/optimizations/FuseConvertFp32ToBf16IntoConstLayers.hpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89

//
// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//

#pragma once

#include "Optimization.hpp"
#include <armnnUtils/Permute.hpp>
#include <ResolveType.hpp>

namespace armnn
{
namespace optimizations
{

class FuseConvertFp32ToBf16IntoConstLayers
{
public:
    void Run(Graph& graph, InputSlot& connection) const
    {
        Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
        Layer& child = connection.GetOwningLayer();

        ARMNN_ASSERT(base.GetType() == LayerType::Constant);
        ARMNN_ASSERT(child.GetType() == LayerType::ConvertFp32ToBf16);

        auto dataType = base.GetDataType();
        switch (dataType)
        {
            case DataType::Float32:
                ReplaceConvertFp32ToBf16Layer<DataType::BFloat16>(
                        graph,
                        PolymorphicDowncast<ConstantLayer*>(&base),
                        PolymorphicDowncast<ConvertFp32ToBf16Layer*>(&child));
                break;
            default:
                throw InvalidArgumentException(GetDataTypeName(dataType) +
                                               std::string(" Constant Layer cannot be fused into ")  +
                                               GetDataTypeName(child.GetDataType()) +
                                               std::string(" conversion layer."));
        }
    }
protected:
    FuseConvertFp32ToBf16IntoConstLayers()  = default;
    ~FuseConvertFp32ToBf16IntoConstLayers() = default;
private:
    template<armnn::DataType ArmnnType,
             typename T = armnn::ResolveType<ArmnnType>>
    static void ReplaceConvertFp32ToBf16Layer(Graph& graph,
                                              ConstantLayer* constantLayer,
                                              ConvertFp32ToBf16Layer* convertFp32ToBf16layer)
    {
        IgnoreUnused(graph);
        /**
         * This optimisation is to find situations where a constant set of inputs is being provided to a
         * ConvertFp32ToBf16 layer. In this case we don't want the overhead of Converting the values on
         * every inference, instead we want to Convert them once and store them in a Const layer to be
         * used everytime as they will not change.
         */
        TensorInfo outputConvertFp32ToBf16Info = convertFp32ToBf16layer->GetOutputSlot(0).GetTensorInfo();
        std::vector<T> newValues(outputConvertFp32ToBf16Info.GetNumElements());

        armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(
                constantLayer->m_LayerOutput->GetConstTensor<float>(),
                outputConvertFp32ToBf16Info.GetNumElements(),
                newValues.data());
        TensorInfo newInfo = outputConvertFp32ToBf16Info;
        newInfo.SetConstant(true);
        ConstTensor newInput(newInfo, newValues);

        constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));

        // Moves connections in convertFp32ToBf16layer output slot to the constant layer.
        // ConvertFp32ToBf16layer layer will be removed if left unconnected.
        convertFp32ToBf16layer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot());

        // Updating the output tensor
        constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
        ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true);
    }
};

using FuseConversionLayersIntoConstLayers = OptimizeForConnection<ConstantLayer,
                                                                  ConvertFp32ToBf16Layer,
                                                                  FuseConvertFp32ToBf16IntoConstLayers>;

} // namespace optimizations
} // namespace armnn