plain/22.05.01/_convert_const_dequantisation_layers_to_const_layers_8hpp_source.xhtml

 //
 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 #pragma once

 #include "Optimization.hpp"
 #include "NetworkUtils.hpp"

 namespace armnn
 {
 namespace optimizations
 {

 class ConvertConstDequantisationLayersToConstLayersImpl
 {
 public:
     void Run(Graph& graph, InputSlot& connection) const
     {
         Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
         Layer& child = connection.GetOwningLayer();

         ARMNN_ASSERT(base.GetType() == LayerType::Constant);
         ARMNN_ASSERT(child.GetType() == LayerType::Dequantize);

         ReplaceConstDequantisationLayer(graph,
                                         PolymorphicDowncast<ConstantLayer*>(&base),
                                         PolymorphicDowncast<DequantizeLayer*>(&child));

     }
 protected:
     ConvertConstDequantisationLayersToConstLayersImpl() = default;
     ~ConvertConstDequantisationLayersToConstLayersImpl() = default;
 private:

     static void ReplaceConstDequantisationLayer(Graph& graph,
                                                 ConstantLayer* constantLayer,
                                                 DequantizeLayer* dequantizeLayer)
     {
         IgnoreUnused(graph);
         /**
          * This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization
          * layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we
          * want to Dequantize them once and store them in a Const layer to be used everytime as they will not change.
          */
         TensorInfo constantInfo = constantLayer->GetOutputSlot(0).GetTensorInfo();
         TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
         TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo();

         ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1);
         auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections();

         std::vector<float> newValues(outputDequantizeInfo.GetNumElements());
         if (constantInfo.GetDataType() == DataType::Float16 &&
             inputDequantizeInfo.GetDataType() == DataType::Float16 &&
             outputDequantizeInfo.GetDataType() == DataType::Float32)
         {
             armnnUtils::FloatingPointConverter::ConvertFloat16To32(constantLayer->m_LayerOutput->Map(true),
                                                                    outputDequantizeInfo.GetNumElements(),
                                                                    newValues.data());
         }
         else if (constantInfo.GetDataType() == DataType::QAsymmS8 &&
                 inputDequantizeInfo.GetDataType() == DataType::QAsymmS8 &&
                 outputDequantizeInfo.GetDataType() == DataType::Float32)
         {
             ConvertInt8To32(constantLayer->m_LayerOutput->Map(true),
                             outputDequantizeInfo.GetNumElements(),
                             newValues.data());
         }

         TensorInfo newInfo = outputDequantizeInfo;
         newInfo.SetConstant(true);
         ConstTensor newInput(newInfo, newValues);
         constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));

         // Moves connections in dequantize output to the constant layer.
         // Dequantize layer will be removed if left unconnected.
         dequantizeLayer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot());

         // Updating the output tensor
         constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
         ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true);

         // Set isConstant to true in all input tensor infos where constantLayer is now connected to
         for (unsigned int i = numConnections; i < constantLayer->GetOutputSlot(0).GetNumConnections(); ++i)
         {
             auto info = constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
                     .GetConnectedOutputSlot()->GetTensorInfo();
             info.SetConstant();
             constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
                     .GetConnectedOutputSlot()->SetTensorInfo(info);
         }
     }


 static void ConvertInt8To32(const void* srcInt8Buffer,
                             size_t numElements,
                             float* dstFloat32Buffer)
 {
     ARMNN_ASSERT(srcInt8Buffer != nullptr);
     ARMNN_ASSERT(dstFloat32Buffer != nullptr);

     const auto* pInt8 = static_cast<const int8_t*>(srcInt8Buffer);

     for (size_t i = 0; i < numElements; ++i)
     {
         dstFloat32Buffer[i] = pInt8[i];
     }
 }

 };

 using ConvertConstDequantisationLayersToConstLayers
     = OptimizeForConnection<ConstantLayer,
                             DequantizeLayer,
                             ConvertConstDequantisationLayersToConstLayersImpl>;

 } // namespace optimizations
 } // namespace armnn
armnn::ConstantLayer
A layer that the constant data can be bound to.
Definition: ConstantLayer.hpp:15

armnn::TensorInfo::IsConstant
bool IsConstant() const
Definition: Tensor.cpp:509

armnn::TensorInfo
Definition: Tensor.hpp:152

armnn::ConstantLayer::m_LayerOutput
std::shared_ptr< ConstTensorHandle > m_LayerOutput
Definition: ConstantLayer.hpp:48

armnn::OutputSlot::GetOwningLayer
Layer & GetOwningLayer() const
Definition: Layer.hpp:118

armnn::optimizations::ConvertConstDequantisationLayersToConstLayersImpl::Run
void Run(Graph &graph, InputSlot &connection) const
Definition: ConvertConstDequantisationLayersToConstLayers.hpp:18

armnn::DataType::QAsymmS8

armnn::InputSlot
Definition: Layer.hpp:42

armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6

armnn::IgnoreUnused
void IgnoreUnused(Ts &&...)
Definition: IgnoreUnused.hpp:14

armnn::Layer::GetNumOutputSlots
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:320

armnn::OutputSlot::GetNumConnections
unsigned int GetNumConnections() const override
Definition: Layer.hpp:143

armnn::Layer::GetInputSlot
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:322

armnn::DequantizeLayer
This layer dequantizes the input tensor.
Definition: DequantizeLayer.hpp:13

armnn::TensorInfo::GetDataType
DataType GetDataType() const
Definition: Tensor.hpp:198

armnn::ConstTensor
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327

armnn::ScopedTensorHandle
Definition: TensorHandle.hpp:115

armnn::Layer::GetType
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:271

NetworkUtils.hpp

armnn::DataType::Float16

ARMNN_ASSERT
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

armnn::InputSlot::GetConnectedOutputSlot
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56

armnnUtils::FloatingPointConverter::ConvertFloat16To32
static void ConvertFloat16To32(const void *srcFloat16Buffer, size_t numElements, float *dstFloat32Buffer)
Definition: FloatingPointConverter.cpp:31

armnn::optimizations::ConvertConstDequantisationLayersToConstLayersImpl::ConvertConstDequantisationLayersToConstLayersImpl
ConvertConstDequantisationLayersToConstLayersImpl()=default

armnn::LayerType::Dequantize

armnn::InputSlot::GetOwningLayer
Layer & GetOwningLayer() const
Definition: Layer.hpp:53

armnn::OptimizeForConnection
Definition: Optimization.hpp:118

armnn::Graph
Definition: Graph.hpp:30

armnn::BoostLogSeverityMapping::info

armnn::optimizations::ConvertConstDequantisationLayersToConstLayersImpl
Definition: ConvertConstDequantisationLayersToConstLayers.hpp:15

armnn::OutputSlot::SetTensorInfo
void SetTensorInfo(const TensorInfo &tensorInfo) override
Definition: Layer.cpp:87

armnn::TensorInfo::SetConstant
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:514

armnn::Layer::GetOutputSlot
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition: Layer.hpp:324

armnn::DataType::Float32

Optimization.hpp

armnn::optimizations::ConvertConstDequantisationLayersToConstLayersImpl::~ConvertConstDequantisationLayersToConstLayersImpl
~ConvertConstDequantisationLayersToConstLayersImpl()=default

armnn::PaddingMode::Constant

armnn::OutputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:92

armnn::OutputSlot::MoveAllConnections
void MoveAllConnections(OutputSlot &destination)
Moves all connections to another OutputSlot.
Definition: Layer.cpp:145

armnn::Layer
Definition: Layer.hpp:215

armnn::OutputSlot::GetConnection
const InputSlot * GetConnection(unsigned int index) const override
Definition: Layer.cpp:75

armnn::TensorInfo::GetNumElements
unsigned int GetNumElements() const
Definition: Tensor.hpp:196