src/armnn/NetworkUtils.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140

//
// Copyright © 2017 Arm Ltd. All rights reserved.
// SPDX-License-Identifier: MIT
//

#include "NetworkUtils.hpp"

#include "SubgraphViewSelector.hpp"

#include <armnn/Exceptions.hpp>
#include <armnn/BackendRegistry.hpp>

namespace armnn
{

namespace
{

void UpdateOutputSlotFp16ToFp32(OutputSlot& outputSlot)
{
    const TensorInfo& origTensorInfo = outputSlot.GetTensorInfo();
    TensorInfo newTensorInfo(origTensorInfo);
    newTensorInfo.SetDataType(DataType::Float32);
    outputSlot.SetTensorInfo(newTensorInfo);
}

void ChangeOutputFp16ToFp32(Layer& layer)
{
    for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
    {
        if (outputSlot->GetTensorInfo().GetDataType() == DataType::Float16)
        {
            UpdateOutputSlotFp16ToFp32(*outputSlot);
        }
    }
}

} // anonymous namespace

std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph,
                                                                         Layer& layer,
                                                                         bool expectCorrectInputType)
{
    std::vector<ConvertFp16ToFp32Layer*> convertLayers;
    convertLayers.reserve(layer.GetNumInputSlots());

    // Insert a ConvertFp16ToFp32Layer before each input slot
    for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
    {
        bool allowInsert = true;
        if (expectCorrectInputType)
        {
            // Only insert ConvertFp16ToFp32Layer before FP16 input slots
            OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
            allowInsert =
                connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float16;
        }

        if (allowInsert)
        {
            const std::string name =
                std::string("convert_fp16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
                layer.GetName();
            ConvertFp16ToFp32Layer* convertLayer =
                graph.InsertNewLayer<ConvertFp16ToFp32Layer>(*inputSlot, name.c_str());

            TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
            convertInfo.SetDataType(DataType::Float32);

            convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);

            convertLayers.emplace_back(convertLayer);
        }
    }

    return convertLayers;
}

std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer)
{
    const unsigned int numOutputSlots = layer.GetNumOutputSlots();

    std::vector<ConvertFp32ToFp16Layer*> convertLayers;
    convertLayers.reserve(numOutputSlots);

    // Update FP16 output slots to FP32 on current layer
    ChangeOutputFp16ToFp32(layer);

    // Insert a ConvertFp32ToFp16Layer after each FP32 output slot
    for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
    {
        OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
        if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
        {
            const std::string name =
                std::string("convert_fp32_to_fp16-" + std::to_string(slotIndex) + "-") + layer.GetName();
            ConvertFp32ToFp16Layer* convertLayer =
                graph.InsertNewLayer<ConvertFp32ToFp16Layer>(outputSlot, name.c_str());

            TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
            convertInfo.SetDataType(DataType::Float16);

            convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);

            convertLayers.emplace_back(convertLayer);
        }
    }

    return convertLayers;
}

std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer)
{
    std::vector<DebugLayer*> debugLayers;
    debugLayers.reserve(layer.GetNumOutputSlots());

    // Connect a DebugLayer to each output slot of the layer
    for (auto outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
    {
        const std::string debugName = std::string("DebugLayerAfter") + layer.GetNameStr();

        DebugLayer* debugLayer =
            graph.InsertNewLayer<DebugLayer>(*outputSlot, debugName.c_str());

        // Sets output tensor info for the debug layer.
        BOOST_ASSERT(debugLayer->GetInputSlot(0).GetConnectedOutputSlot() == &(*outputSlot));
        TensorInfo debugInfo = debugLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();

        debugLayer->GetOutputSlot().SetTensorInfo(debugInfo);

        // NOTE: It is OK to do this because DebugLayer is only supported on CpuRef
        debugLayer->SetBackendId(Compute::CpuRef);

        debugLayers.emplace_back(debugLayer);
    }

    return debugLayers;
}

} // namespace armnn