ArmNN
 22.08
Fp32NetworkToBf16ConverterTests.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2020 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include <TestUtils.hpp>
7 
8 #include <Optimizer.hpp>
9 
10 #include <doctest/doctest.h>
11 
12 TEST_SUITE("Optimizer")
13 {
14 using namespace armnn::optimizations;
15 
16 TEST_CASE("Fp32NetworkToBf16OptimizationNoConversionTest")
17 {
18  armnn::Graph graph;
19 
20  const armnn::TensorInfo infoFP32({ 2, 2, 1, 3 }, armnn::DataType::Float32);
21 
22  // Create the simple test network without Conv2D/FullyConnected.
23  auto input = graph.AddLayer<armnn::InputLayer>(0, "input");
24  input->GetOutputSlot().SetTensorInfo(infoFP32);
25 
26  auto floor = graph.AddLayer<armnn::FloorLayer>("floor");
27  floor->GetOutputSlot().SetTensorInfo(infoFP32);
28 
29  auto output = graph.AddLayer<armnn::OutputLayer>(1, "output");
30 
31  // Connect up the layers
32  input->GetOutputSlot().Connect(floor->GetInputSlot(0));
33  floor->GetOutputSlot().Connect(output->GetInputSlot(0));
34 
35  CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>,
36  &IsLayerOfType<armnn::FloorLayer>, &IsLayerOfType<armnn::OutputLayer>));
37 
38  // Run the optimizer
40 
41  CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>,
42  &IsLayerOfType<armnn::FloorLayer>,
43  &IsLayerOfType<armnn::OutputLayer>));
44 }
45 
46 TEST_CASE("Fp32NetworkToBf16OptimizationConv2DTest")
47 {
48  armnn::Graph graph;
49 
50  const armnn::TensorInfo infoFP32({ 2, 3, 8, 1 }, armnn::DataType::Float32);
51 
52  // Create const tensor fp32 data
53  unsigned int dims[] = { 4, 2, 1, 1 };
54  std::vector<float> floatWeights{ 0.0f, -1.0f,
55  3.8f, // 0x40733333 Round down
56  3.1055E+29f, // 0x707ADC3C Round up
57  9.149516E-10f, // 0x307B7FFF Round down
58  -3.8f, // 0xC0733333 Round down
59  -3.1055E+29f, // 0xF07ADC3C Round up
60  -9.149516E-10f // 0xB07B7FFF Round down
61  };
62  armnn::ConstTensor weights(armnn::TensorInfo(4, dims, armnn::DataType::Float32, 0.0f, 0, true), floatWeights);
63 
64  // Create const bias fp32 data
65  unsigned int biasDims[] {4};
66  std::vector<float> floatBias{ 1.0f, 2.0f, 3.0f, 4.0f };
67  armnn::ConstTensor bias(armnn::TensorInfo(1, biasDims, armnn::DataType::Float32, 0.0f, 0, true), floatBias);
68 
69  // A network with Convolution2d layer
70  auto input = graph.AddLayer<armnn::InputLayer>(0, "input");
71  input->GetOutputSlot().SetTensorInfo(infoFP32);
72 
74  descriptor.m_BiasEnabled = true;
75  auto conv = graph.AddLayer<armnn::Convolution2dLayer>(descriptor, "conv2d");
76  conv->GetOutputSlot().SetTensorInfo(infoFP32);
77 
78  auto weightsLayer = graph.AddLayer<armnn::ConstantLayer>("Weights");
79  weightsLayer->m_LayerOutput = std::make_shared<armnn::ScopedTensorHandle>(weights);
80  weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsLayer->m_LayerOutput->GetTensorInfo());
81 
82  auto biasLayer = graph.AddLayer<armnn::ConstantLayer>("Bias");
83  biasLayer->m_LayerOutput = std::make_shared<armnn::ScopedTensorHandle>(bias);
84  biasLayer->GetOutputSlot(0).SetTensorInfo(biasLayer->m_LayerOutput->GetTensorInfo());
85 
86  auto output = graph.AddLayer<armnn::OutputLayer>(1, "output");
87 
88  // Connect up the layers
89  input->GetOutputSlot().Connect(conv->GetInputSlot(0));
90  weightsLayer->GetOutputSlot(0).Connect(conv->GetInputSlot(1));
91  biasLayer->GetOutputSlot(0).Connect(conv->GetInputSlot(2));
92  conv->GetOutputSlot().Connect(output->GetInputSlot(0));
93 
94  CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>,
95  &IsLayerOfType<armnn::ConstantLayer>,
96  &IsLayerOfType<armnn::ConstantLayer>,
97  &IsLayerOfType<armnn::Convolution2dLayer>,
98  &IsLayerOfType<armnn::OutputLayer>));
99 
100  // Run the optimizer
103 
104  CHECK(7 == graph.GetNumLayers());
105  CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>,
106  &IsLayerOfType<armnn::ConstantLayer>,
107  &IsLayerOfType<armnn::ConstantLayer>,
108  &IsLayerOfType<armnn::ConvertFp32ToBf16Layer>,
109  &IsLayerOfType<armnn::ConvertFp32ToBf16Layer>,
110  &IsLayerOfType<armnn::Convolution2dLayer>,
111  &IsLayerOfType<armnn::OutputLayer>));
112 
113  armnn::TensorInfo inputTensor = conv->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
114  armnn::TensorInfo weightTensor = conv->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo();
115  armnn::TensorInfo biasTensor = conv->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
116  armnn::TensorInfo outputTensor = conv->GetOutputSlot(0).GetTensorInfo();
117  CHECK((conv->GetDataType() == armnn::DataType::BFloat16));
118  CHECK((conv->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::BFloat16));
119  CHECK((conv->m_Bias->GetTensorInfo().GetDataType() == armnn::DataType::Float32));
120  CHECK((inputTensor.GetDataType() == armnn::DataType::BFloat16));
121  CHECK((weightTensor.GetDataType() == armnn::DataType::BFloat16));
122  CHECK((biasTensor.GetDataType() == armnn::DataType::Float32));
123  CHECK((outputTensor.GetDataType() == armnn::DataType::Float32));
124 
125  // Check whether data matches expected Bf16 data
126  const armnn::BFloat16* data = conv->m_Weight->GetConstTensor<armnn::BFloat16>();
127  CHECK(data[0] == armnn::BFloat16(0.0f));
128  CHECK(data[1] == armnn::BFloat16(-1.0f));
129  CHECK(data[2] == armnn::BFloat16(3.796875f)); // 0x4073
130  CHECK(data[3] == armnn::BFloat16(3.1072295E29f)); // 0x707B
131  CHECK(data[4] == armnn::BFloat16(9.131327E-10f)); // 0x307B
132  CHECK(data[5] == armnn::BFloat16(-3.796875f)); // 0xC073
133  CHECK(data[6] == armnn::BFloat16(-3.1072295E29f)); // 0xF07B
134  CHECK(data[7] == armnn::BFloat16(-9.131327E-10f)); // 0xB07B
135 }
136 
137 TEST_CASE("Fp32NetworkToBf16OptimizationFullyConnectedTest")
138 {
139  armnn::Graph graph;
140 
141  const armnn::TensorInfo infoFP32({ 2, 3, 8, 1 }, armnn::DataType::Float32);
142 
143  // Create const tensor fp32 data
144  unsigned int dims[] = { 4, 2, 1, 1 };
145  std::vector<float> floatWeights{ 0.0f, -1.0f,
146  3.8f, // 0x40733333 Round down
147  3.1055E+29f, // 0x707ADC3C Round up
148  9.149516E-10f, // 0x307B7FFF Round down
149  -3.8f, // 0xC0733333 Round down
150  -3.1055E+29f, // 0xF07ADC3C Round up
151  -9.149516E-10f // 0xB07B7FFF Round down
152  };
153  armnn::ConstTensor weights(armnn::TensorInfo(4, dims, armnn::DataType::Float32, 0.0f, 0, true), floatWeights);
154 
155  // Create const bias fp32 data
156  unsigned int biasDims[] {4};
157  std::vector<float> floatBias{ 1.0f, 2.0f, 3.0f, 4.0f };
158  armnn::ConstTensor bias(armnn::TensorInfo(1, biasDims, armnn::DataType::Float32, 0.0f, 0, true), floatBias);
159 
160  // A network with FullyConnected layer
161  auto input = graph.AddLayer<armnn::InputLayer>(0, "input");
162  input->GetOutputSlot().SetTensorInfo(infoFP32);
163 
165  descriptor.m_BiasEnabled = true;
166 
167  auto fc = graph.AddLayer<armnn::FullyConnectedLayer>(descriptor, "fully");
168  fc->GetOutputSlot().SetTensorInfo(infoFP32);
169 
170  auto weightsLayer = graph.AddLayer<armnn::ConstantLayer>("Weights");
171  weightsLayer->m_LayerOutput = std::make_shared<armnn::ScopedTensorHandle>(weights);
172  weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsLayer->m_LayerOutput->GetTensorInfo());
173 
174  auto biasLayer = graph.AddLayer<armnn::ConstantLayer>("Bias");
175  biasLayer->m_LayerOutput = std::make_shared<armnn::ScopedTensorHandle>(bias);
176  biasLayer->GetOutputSlot(0).SetTensorInfo(biasLayer->m_LayerOutput->GetTensorInfo());
177 
178  auto output = graph.AddLayer<armnn::OutputLayer>(1, "output");
179 
180  // Connect up the layers
181  input->GetOutputSlot().Connect(fc->GetInputSlot(0));
182  weightsLayer->GetOutputSlot(0).Connect(fc->GetInputSlot(1));
183  biasLayer->GetOutputSlot(0).Connect(fc->GetInputSlot(2));
184  fc->GetOutputSlot().Connect(output->GetInputSlot(0));
185 
186  CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>,
187  &IsLayerOfType<armnn::ConstantLayer>,
188  &IsLayerOfType<armnn::ConstantLayer>,
189  &IsLayerOfType<armnn::FullyConnectedLayer>,
190  &IsLayerOfType<armnn::OutputLayer>));
191 
192  // Run the optimizer
195 
196  CHECK(7 == graph.GetNumLayers());
197  CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>,
198  &IsLayerOfType<armnn::ConstantLayer>,
199  &IsLayerOfType<armnn::ConstantLayer>,
200  &IsLayerOfType<armnn::ConvertFp32ToBf16Layer>,
201  &IsLayerOfType<armnn::ConvertFp32ToBf16Layer>,
202  &IsLayerOfType<armnn::FullyConnectedLayer>,
203  &IsLayerOfType<armnn::OutputLayer>));
204 
205  armnn::TensorInfo inputTensor = fc->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
206  armnn::TensorInfo weightTensor = fc->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo();
207  armnn::TensorInfo biasTensor = fc->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
208  armnn::TensorInfo outputTensor = fc->GetOutputSlot(0).GetTensorInfo();
209  CHECK((fc->GetDataType() == armnn::DataType::BFloat16));
210  CHECK((fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::BFloat16));
211  CHECK((fc->m_Bias->GetTensorInfo().GetDataType() == armnn::DataType::Float32));
212  CHECK((inputTensor.GetDataType() == armnn::DataType::BFloat16));
213  CHECK((weightTensor.GetDataType() == armnn::DataType::BFloat16));
214  CHECK((biasTensor.GetDataType() == armnn::DataType::Float32));
215  CHECK((outputTensor.GetDataType() == armnn::DataType::Float32));
216 
217  // Check whether data matches expected Bf16 data
218  const armnn::BFloat16* data = fc->m_Weight->GetConstTensor<armnn::BFloat16>();
219  CHECK(data[0] == armnn::BFloat16(0.0f));
220  CHECK(data[1] == armnn::BFloat16(-1.0f));
221  CHECK(data[2] == armnn::BFloat16(3.796875f)); // 0x4073
222  CHECK(data[3] == armnn::BFloat16(3.1072295E29f)); // 0x707B
223  CHECK(data[4] == armnn::BFloat16(9.131327E-10f)); // 0x307B
224  CHECK(data[5] == armnn::BFloat16(-3.796875f)); // 0xC073
225  CHECK(data[6] == armnn::BFloat16(-3.1072295E29f)); // 0xF07B
226  CHECK(data[7] == armnn::BFloat16(-9.131327E-10f)); // 0xB07B
227 }
228 
229 }
A layer that the constant data can be bound to.
bool m_BiasEnabled
Enable/disable bias.
Optimizer::Optimizations MakeOptimizations(Args &&... args)
Definition: Optimizer.hpp:43
bool CheckSequence(const armnn::Graph::ConstIterator first, const armnn::Graph::ConstIterator last)
Definition: TestUtils.hpp:21
std::shared_ptr< ConstTensorHandle > m_LayerOutput
LayerT * AddLayer(Args &&... args)
Adds a new layer, of type LayerType, to the graph constructed with the arguments passed.
Definition: Graph.hpp:456
ConstIterator cbegin() const
Returns const iterator pointing to the beginning of the list. Lowercase for range-based for loops...
Definition: Graph.hpp:179
A Convolution2dDescriptor for the Convolution2dLayer.
int Connect(InputSlot &destination)
Definition: Layer.cpp:112
static void Pass(Graph &graph, const Optimizations &optimizations)
Definition: Optimizer.cpp:16
OptimizeForType< Layer, RedirectMembersToConstantInputsImpl > RedirectMembersToConstantInputs
TEST_SUITE("Optimizer")
A layer user-provided data can be bound to (e.g. inputs, outputs).
Definition: OutputLayer.hpp:13
This layer represents a fully connected operation.
DataType GetDataType() const
Definition: Tensor.hpp:198
A FullyConnectedDescriptor for the FullyConnectedLayer.
bool m_BiasEnabled
Enable/disable bias.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
This layer represents a floor operation.
Definition: FloorLayer.hpp:13
A layer user-provided data can be bound to (e.g. inputs, outputs).
Definition: InputLayer.hpp:13
void SetTensorInfo(const TensorInfo &tensorInfo) override
Definition: Layer.cpp:87
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition: Layer.hpp:326
ConstIterator cend() const
Returns const iterator pointing to the end of the list. Lowercase for range-based for loops...
Definition: Graph.hpp:181
This layer represents a convolution 2d operation.
size_t GetNumLayers() const
Definition: Graph.hpp:198
OptimizeForType< Layer, ConvertFp32NetworkToBf16Impl > Fp32NetworkToBf16Converter