ArmNN
 22.08
ConvertConstDequantisationLayersToConstLayers.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include "Optimization.hpp"
8 #include "NetworkUtils.hpp"
9 
10 #include <armnn/Logging.hpp>
11 #include <armnnUtils/Permute.hpp>
12 
13 namespace armnn
14 {
15 namespace optimizations
16 {
17 
19 {
20 public:
21  void Run(Graph& graph, InputSlot& connection) const
22  {
23  Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
24  Layer& child = connection.GetOwningLayer();
25 
28 
29  ReplaceConstDequantisationLayer(graph,
30  PolymorphicDowncast<ConstantLayer*>(&base),
31  PolymorphicDowncast<DequantizeLayer*>(&child));
32 
33  }
34 protected:
37 private:
38 
39  static void ReplaceConstDequantisationLayer(Graph&,
40  ConstantLayer* constantLayer,
41  DequantizeLayer* dequantizeLayer)
42  {
43  ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl::ReplaceConstDequantisationLayer()";
44  /**
45  * This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization
46  * layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we
47  * want to Dequantize them once and store them in a Const layer to be used everytime as they will not change.
48  */
49  TensorInfo constantInfo = constantLayer->GetOutputSlot(0).GetTensorInfo();
50  TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
51  TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo();
52 
53  bool requiresPermute = false;
54 
55  auto connection = dequantizeLayer->GetOutputSlot(0).GetConnection(0);
56  if (connection)
57  {
58  if (connection->GetOwningLayer().GetType() == LayerType::Convolution2d)
59  {
60  /**
61  * ArmNN does not currently support non-fixed weights or bias
62  * The NNAPI filter is always OHWI [depth_out, filter_height, filter_width, depth_in]
63  * but ArmNN expects the filter's height and width indices to match the input's height
64  * and width indices so we permute it to OIHW if the DataLayout is NCHW
65  */
66  ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
67  "Convolution layer.";
68  auto conv2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&connection->GetOwningLayer());
69  if (conv2dLayer->GetParameters().m_DataLayout == DataLayout::NCHW)
70  {
71  ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
72  "Convolution layer and requires permute on weights. ";
73  requiresPermute = true;
74  }
75  }
76  }
77 
78  ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1);
79  auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections();
80 
81  ARMNN_LOG(info) << "constantInfo datatype:" << armnn::GetDataTypeName(constantInfo.GetDataType())
82  << "inputDequantizeInfo datatype:" << armnn::GetDataTypeName(inputDequantizeInfo.GetDataType())
83  << "outputDequantizeInfo datatype:" << armnn::GetDataTypeName(outputDequantizeInfo.GetDataType());
84 
85  std::vector<float> newValues(outputDequantizeInfo.GetNumElements());
86  if (constantInfo.GetDataType() == DataType::Float16 &&
87  inputDequantizeInfo.GetDataType() == DataType::Float16 &&
88  outputDequantizeInfo.GetDataType() == DataType::Float32)
89  {
90  ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting FP16 -> FP32";
92  outputDequantizeInfo.GetNumElements(),
93  newValues.data());
94  }
95  else if (((constantInfo.GetDataType() == DataType::QAsymmS8
96  && inputDequantizeInfo.GetDataType() == DataType::QAsymmS8)
97  || (constantInfo.GetDataType() == DataType::QSymmS8
98  && inputDequantizeInfo.GetDataType() == DataType::QSymmS8)) &&
99  outputDequantizeInfo.GetDataType() == DataType::Float32)
100  {
101  ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting INT8 -> FP32";
102  ConvertInt8To32(constantLayer->m_LayerOutput->Map(true),
103  outputDequantizeInfo.GetNumElements(),
104  inputDequantizeInfo.GetQuantizationScale(),
105  inputDequantizeInfo.GetQuantizationOffset(),
106  newValues.data());
107  }
108 
109  TensorInfo newInfo = outputDequantizeInfo;
110  newInfo.SetConstant(true);
111  if (requiresPermute)
112  {
113  ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Permuting the constant data.";
114  const PermutationVector OHWIToOIHW = {0, 2, 3, 1};
115  std::vector<float> permutedValues(outputDequantizeInfo.GetNumElements());
116  armnnUtils::Permute(outputDequantizeInfo.GetShape(), OHWIToOIHW,
117  newValues.data(), permutedValues.data(),
118  GetDataTypeSize(outputDequantizeInfo.GetDataType()));
119  ConstTensor newInput(newInfo, permutedValues);
120  constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
121  }
122  else
123  {
124  ConstTensor newInput(newInfo, newValues);
125  constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
126  }
127 
128  // Moves connections in dequantize output to the constant layer.
129  // Dequantize layer will be removed if left unconnected.
130  dequantizeLayer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot());
131 
132  // Updating the output tensor
133  constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
134  ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true);
135 
136  // Set isConstant to true in all input tensor infos where constantLayer is now connected to
137  for (unsigned int i = numConnections; i < constantLayer->GetOutputSlot(0).GetNumConnections(); ++i)
138  {
139  auto info = constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
141  info.SetConstant();
142  constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
144  }
145  }
146 
147 
148 static void ConvertInt8To32(const void* srcInt8Buffer,
149  size_t numElements,
150  const float scale,
151  const int32_t offset,
152  float* dstFloat32Buffer)
153 {
154  ARMNN_ASSERT(srcInt8Buffer != nullptr);
155  ARMNN_ASSERT(dstFloat32Buffer != nullptr);
156 
157  ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: scale: " << scale;
158  ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: offset: " << offset;
159 
160  const auto* pInt8 = static_cast<const int8_t*>(srcInt8Buffer);
161 
162  for (size_t i = 0; i < numElements; ++i)
163  {
164  dstFloat32Buffer[i] = static_cast<float>(pInt8[i] - offset) * scale;
165  }
166 }
167 
168 };
169 
174 
175 } // namespace optimizations
176 } // namespace armnn
A layer that the constant data can be bound to.
bool IsConstant() const
Definition: Tensor.cpp:509
const TensorShape & GetShape() const
Definition: Tensor.hpp:191
std::shared_ptr< ConstTensorHandle > m_LayerOutput
Layer & GetOwningLayer() const
Definition: Layer.hpp:119
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205
Copyright (c) 2021 ARM Limited and Contributors.
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:322
constexpr const char * GetDataTypeName(DataType dataType)
Definition: TypesUtils.hpp:202
unsigned int GetNumConnections() const override
Definition: Layer.hpp:145
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:324
This layer dequantizes the input tensor.
int32_t GetQuantizationOffset() const
Definition: Tensor.cpp:478
float GetQuantizationScale() const
Definition: Tensor.cpp:461
DataType GetDataType() const
Definition: Tensor.hpp:198
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:273
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
static void ConvertFloat16To32(const void *srcFloat16Buffer, size_t numElements, float *dstFloat32Buffer)
Layer & GetOwningLayer() const
Definition: Layer.hpp:53
void SetTensorInfo(const TensorInfo &tensorInfo) override
Definition: Layer.cpp:87
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:514
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition: Layer.hpp:326
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:92
void MoveAllConnections(OutputSlot &destination)
Moves all connections to another OutputSlot.
Definition: Layer.cpp:145
const InputSlot * GetConnection(unsigned int index) const override
Definition: Layer.cpp:75
unsigned int GetNumElements() const
Definition: Tensor.hpp:196
constexpr unsigned int GetDataTypeSize(DataType dataType)
Definition: TypesUtils.hpp:151