// // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once #include "armnn/LayerVisitorBase.hpp" #include "StaticRangeVisitor.hpp" #include "NetworkQuantizationScheme.hpp" #include #include #include #include namespace armnn { // Forward declaration class StaticRangeVisitor; /// Visitor object for quantizing layers in a network class QuantizerVisitor : public LayerVisitorBase { public: QuantizerVisitor(const RangeTracker& rangeTracker, const IQuantizationScheme* quantizationScheme, bool preserveType = false); ~QuantizerVisitor() = default; /// Functions to quantize the individual layers, overridden from ILayerVisitor void VisitActivationLayer(const IConnectableLayer* layer, const ActivationDescriptor& activationDescriptor, const char* name = nullptr) override; void VisitAdditionLayer(const IConnectableLayer* layer, const char* name = nullptr) override; void VisitBatchNormalizationLayer(const IConnectableLayer* layer, const BatchNormalizationDescriptor& desc, const ConstTensor& mean, const ConstTensor& variance, const ConstTensor& beta, const ConstTensor& gamma, const char* name = nullptr) override; void VisitBatchToSpaceNdLayer(const IConnectableLayer* layer, const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor, const char* name = nullptr) override; void VisitConcatLayer(const IConnectableLayer* layer, const OriginsDescriptor& originsDescriptor, const char* name = nullptr) override; void VisitConstantLayer(const IConnectableLayer* layer, const ConstTensor& input, const char* name = nullptr) override; void VisitConvolution2dLayer(const IConnectableLayer* layer, const Convolution2dDescriptor& convolution2dDescriptor, const ConstTensor& weights, const Optional& biases, const char* name = nullptr) override; void VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer, const DepthwiseConvolution2dDescriptor& desc, const ConstTensor& weights, const Optional& biases, const char* name = nullptr) override; void VisitFullyConnectedLayer(const IConnectableLayer *layer, const FullyConnectedDescriptor& desc, const ConstTensor& weights, const Optional& biases, const char *name = nullptr) override; void VisitInputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name = nullptr) override; void VisitMeanLayer(const IConnectableLayer* layer, const MeanDescriptor& meanDescriptor, const char* name = nullptr) override; void VisitMultiplicationLayer(const IConnectableLayer* layer, const char* name = nullptr) override; void VisitNormalizationLayer(const armnn::IConnectableLayer* layer, const armnn::NormalizationDescriptor& normalizationDescriptor, const char* name = nullptr) override; void VisitOutputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name = nullptr) override; void VisitPadLayer(const IConnectableLayer*, const PadDescriptor&, const char* name = nullptr) override; void VisitPermuteLayer(const IConnectableLayer* layer, const PermuteDescriptor& permuteDescriptor, const char* name = nullptr) override; void VisitPooling2dLayer(const IConnectableLayer* layer, const Pooling2dDescriptor& pooling2dDescriptor, const char* name = nullptr) override; void VisitPreluLayer(const IConnectableLayer* layer, const char* name = nullptr) override; void VisitReshapeLayer(const IConnectableLayer* layer, const ReshapeDescriptor& reshapeDescriptor, const char* name = nullptr) override; void VisitResizeBilinearLayer(const IConnectableLayer* layer, const ResizeBilinearDescriptor& resizeDesc, const char* name = nullptr) override; void VisitRsqrtLayer(const IConnectableLayer*, const char* name = nullptr) override; void VisitSoftmaxLayer(const IConnectableLayer* layer, const SoftmaxDescriptor& softmaxDescriptor, const char* name = nullptr) override; void VisitSpaceToBatchNdLayer(const IConnectableLayer* layer, const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor, const char* name = nullptr) override; void VisitSpaceToDepthLayer(const IConnectableLayer* layer, const SpaceToDepthDescriptor& spaceToDepthDescriptor, const char* name = nullptr) override; void VisitSplitterLayer(const IConnectableLayer* layer, const SplitterDescriptor& splitterDescriptor, const char* name = nullptr) override; void VisitStridedSliceLayer(const IConnectableLayer* layer, const StridedSliceDescriptor& stridedSliceDescriptor, const char* name = nullptr) override; void VisitSubtractionLayer(const IConnectableLayer* layer, const char* name = nullptr) override; /// Extract the quantized network INetworkPtr RetrieveFinalNetwork() { return std::move(m_QuantizedNetwork); } private: /// Connects the layer to preceeding layers and sets the quantization parameters based on recorded ranges void SetQuantizedInputConnections(const IConnectableLayer* srcLayer, IConnectableLayer* quantizedLayer); /// Record the guids so we can easily find the layers later void RecordLayer(const IConnectableLayer* srcLayer, IConnectableLayer* qLayer); /// Sets the bias quantization scale based on input and weight scales ConstTensor CreateQuantizedBias(const IConnectableLayer* srcLayer, const ConstTensor& weights, const Optional& biases, std::vector& weightsBacking); /// Reference to the static range visitor used to retrieve the quantization ranges const RangeTracker& m_Ranges; /// Quantized version of the model we are building up INetworkPtr m_QuantizedNetwork; /// Mapping from input network guids to quantized network guids std::unordered_map m_OriginalToQuantizedGuidMap; /// Mapping from guid to layer in quantized network std::unordered_map m_QuantizedGuidToLayerMap; const IQuantizationScheme* m_QuantizationScheme; const bool m_PreserveType; }; } //namespace armnn