17 : m_Ranges(rangeTracker)
18 , m_QuantizedNetwork(
INetwork::Create())
19 , m_QuantizationScheme(quantizationScheme)
20 , m_PreserveType(preserveType)
24 void QuantizerVisitor::SetQuantizedInputConnections(
const IConnectableLayer* srcLayer,
27 BOOST_ASSERT(srcLayer);
31 const InputSlot* inputSlot = boost::polymorphic_downcast<const InputSlot*>(&srcInputSlot);
32 BOOST_ASSERT(inputSlot);
35 BOOST_ASSERT(outputSlot);
39 auto found = m_OriginalToQuantizedGuidMap.find(layerToFind.
GetGuid());
40 if (found == m_OriginalToQuantizedGuidMap.end())
43 BOOST_ASSERT_MSG(
false,
"Error in graph traversal");
51 newOutputSlot.
Connect(newInputSlot);
60 info.SetQuantizationOffset(qParams.second);
61 info.SetQuantizationScale(qParams.first);
69 std::vector<int32_t>& backing)
71 BOOST_ASSERT(srcLayer);
73 auto inputSlot = boost::polymorphic_downcast<const InputSlot*>(&srcInputSlot);
74 BOOST_ASSERT(inputSlot);
75 const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot();
77 BOOST_ASSERT(outputSlot);
81 auto found = m_OriginalToQuantizedGuidMap.find(layerToFind.
GetGuid());
82 if (found == m_OriginalToQuantizedGuidMap.end())
85 BOOST_ASSERT_MSG(
false,
"Error in graph traversal");
86 return biases.
value();
98 backing.resize(biases.
value().GetInfo().GetNumElements());
101 for (
size_t i = 0; i < backing.size(); ++i)
103 float fp32Value =
static_cast<const float*
>(biases.
value().GetMemoryArea())[i];
104 backing[i] = boost::numeric_cast<int32_t>(fp32Value * ( 1 / scale ));
112 m_OriginalToQuantizedGuidMap.insert(std::make_pair(srcLayer->
GetGuid(), quantizedLayer->
GetGuid()));
113 m_QuantizedGuidToLayerMap.insert(std::make_pair(quantizedLayer->
GetGuid(), quantizedLayer));
125 IConnectableLayer* newLayer = m_QuantizedNetwork->AddActivationLayer(activationDescriptor, name);
126 RecordLayer(layer, newLayer);
127 SetQuantizedInputConnections(layer, newLayer);
133 RecordLayer(layer, newLayer);
134 SetQuantizedInputConnections(layer, newLayer);
141 IConnectableLayer* newLayer = m_QuantizedNetwork->AddArgMinMaxLayer(argMinMaxDescriptor, name);
142 RecordLayer(layer, newLayer);
143 SetQuantizedInputConnections(layer, newLayer);
154 std::vector<uint8_t> meanBacking;
157 std::vector<uint8_t> varianceBacking;
160 std::vector<uint8_t> betaBacking;
163 std::vector<uint8_t> gammaBacking;
166 IConnectableLayer* newLayer = m_QuantizedNetwork->AddBatchNormalizationLayer(desc,
173 RecordLayer(layer, newLayer);
174 SetQuantizedInputConnections(layer, newLayer);
181 IConnectableLayer* newLayer = m_QuantizedNetwork->AddBatchToSpaceNdLayer(batchToSpaceNdDescriptor, name);
182 RecordLayer(layer, newLayer);
183 SetQuantizedInputConnections(layer, newLayer);
190 IConnectableLayer* newLayer = m_QuantizedNetwork->AddComparisonLayer(comparisonDescriptor, name);
191 RecordLayer(layer, newLayer);
192 SetQuantizedInputConnections(layer, newLayer);
199 IConnectableLayer* newLayer = m_QuantizedNetwork->AddConcatLayer(originsDescriptor, name);
200 RecordLayer(layer, newLayer);
201 SetQuantizedInputConnections(layer, newLayer);
208 std::vector<uint8_t> inputBacking;
211 IConnectableLayer* newLayer = m_QuantizedNetwork->AddConstantLayer(qInput, name);
212 RecordLayer(layer, newLayer);
221 std::vector<uint8_t> weightsBacking;
224 std::vector<int32_t> biasesBacking;
228 ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking);
232 IConnectableLayer* newLayer = m_QuantizedNetwork->AddConvolution2dLayer(convolution2dDescriptor,
237 RecordLayer(layer, newLayer);
238 SetQuantizedInputConnections(layer, newLayer);
245 IConnectableLayer* newLayer = m_QuantizedNetwork->AddDepthToSpaceLayer(descriptor, name);
246 RecordLayer(layer, newLayer);
247 SetQuantizedInputConnections(layer, newLayer);
256 std::vector<uint8_t> weightsBacking;
259 std::vector<int32_t> biasesBacking;
263 ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking);
267 IConnectableLayer* newLayer = m_QuantizedNetwork->AddDepthwiseConvolution2dLayer(desc,
272 RecordLayer(layer, newLayer);
273 SetQuantizedInputConnections(layer, newLayer);
280 IConnectableLayer* newLayer = m_QuantizedNetwork->AddElementwiseUnaryLayer(elementwiseUnaryDescriptor, name);
281 RecordLayer(layer, newLayer);
282 SetQuantizedInputConnections(layer, newLayer);
291 std::vector<uint8_t> weightsBacking;
294 std::vector<int32_t> biasesBacking;
298 ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking);
307 RecordLayer(layer, newLayer);
308 SetQuantizedInputConnections(layer, newLayer);
321 RecordLayer(layer, quantizeLayer);
325 RecordLayer(layer, inputLayer);
333 IConnectableLayer* newLayer = m_QuantizedNetwork->AddInstanceNormalizationLayer(descriptor, name);
334 RecordLayer(layer, newLayer);
335 SetQuantizedInputConnections(layer, newLayer);
342 IConnectableLayer* newLayer = m_QuantizedNetwork->AddLogSoftmaxLayer(logSoftmaxDescriptor, name);
343 RecordLayer(layer, newLayer);
344 SetQuantizedInputConnections(layer, newLayer);
351 IConnectableLayer* newLayer = m_QuantizedNetwork->AddMeanLayer(meanDescriptor, name);
352 RecordLayer(layer, newLayer);
353 SetQuantizedInputConnections(layer, newLayer);
360 RecordLayer(layer, newLayer);
361 SetQuantizedInputConnections(layer, newLayer);
368 IConnectableLayer* newLayer = m_QuantizedNetwork->AddNormalizationLayer(normalizationDescriptor, name);
369 RecordLayer(layer, newLayer);
370 SetQuantizedInputConnections(layer, newLayer);
382 RecordLayer(layer, dequantizeLayer);
383 SetQuantizedInputConnections(layer, dequantizeLayer);
389 RecordLayer(layer, outputLayer);
390 SetQuantizedInputConnections(layer, outputLayer);
398 IConnectableLayer* newLayer = m_QuantizedNetwork->AddPadLayer(padDescriptor, name);
399 RecordLayer(layer, newLayer);
400 SetQuantizedInputConnections(layer, newLayer);
407 IConnectableLayer* newLayer = m_QuantizedNetwork->AddPermuteLayer(permuteDescriptor, name);
408 RecordLayer(layer, newLayer);
409 SetQuantizedInputConnections(layer, newLayer);
416 IConnectableLayer* newLayer = m_QuantizedNetwork->AddPooling2dLayer(pooling2dDescriptor, name);
417 RecordLayer(layer, newLayer);
418 SetQuantizedInputConnections(layer, newLayer);
425 RecordLayer(layer, newLayer);
426 SetQuantizedInputConnections(layer, newLayer);
433 IConnectableLayer* newLayer = m_QuantizedNetwork->AddReshapeLayer(reshapeDescriptor, name);
434 RecordLayer(layer, newLayer);
435 SetQuantizedInputConnections(layer, newLayer);
455 IConnectableLayer* newLayer = m_QuantizedNetwork->AddResizeLayer(resizeDescriptor, name);
456 RecordLayer(layer, newLayer);
457 SetQuantizedInputConnections(layer, newLayer);
469 IConnectableLayer* newLayer = m_QuantizedNetwork->AddSliceLayer(sliceDescriptor, name);
470 RecordLayer(layer, newLayer);
471 SetQuantizedInputConnections(layer, newLayer);
478 IConnectableLayer* newLayer = m_QuantizedNetwork->AddSoftmaxLayer(softmaxDescriptor, name);
479 RecordLayer(layer, newLayer);
480 SetQuantizedInputConnections(layer, newLayer);
487 IConnectableLayer* newLayer = m_QuantizedNetwork->AddSpaceToBatchNdLayer(spaceToBatchNdDescriptor, name);
488 RecordLayer(layer, newLayer);
489 SetQuantizedInputConnections(layer, newLayer);
496 IConnectableLayer* newLayer = m_QuantizedNetwork->AddSpaceToDepthLayer(spaceToDepthDescriptor, name);
497 RecordLayer(layer, newLayer);
498 SetQuantizedInputConnections(layer, newLayer);
505 IConnectableLayer* newLayer = m_QuantizedNetwork->AddSplitterLayer(splitterDescriptor, name);
506 RecordLayer(layer, newLayer);
507 SetQuantizedInputConnections(layer, newLayer);
514 IConnectableLayer* newLayer = m_QuantizedNetwork->AddStackLayer(stackDescriptor, name);
515 RecordLayer(layer, newLayer);
516 SetQuantizedInputConnections(layer, newLayer);
523 IConnectableLayer* newLayer = m_QuantizedNetwork->AddStridedSliceLayer(stridedSliceDescriptor, name);
524 RecordLayer(layer, newLayer);
525 SetQuantizedInputConnections(layer, newLayer);
532 RecordLayer(layer, newLayer);
533 SetQuantizedInputConnections(layer, newLayer);
543 std::vector<uint8_t> weightsBacking;
547 std::vector<int32_t> biasesBacking;
551 ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking);
555 IConnectableLayer* newLayer = m_QuantizedNetwork->AddTransposeConvolution2dLayer(descriptor,
560 RecordLayer(layer, newLayer);
561 SetQuantizedInputConnections(layer, newLayer);
void VisitPreluLayer(const IConnectableLayer *layer, const char *name=nullptr) override
virtual LayerGuid GetGuid() const =0
LayerGuid GetGuid() const final
void VisitStridedSliceLayer(const IConnectableLayer *layer, const StridedSliceDescriptor &stridedSliceDescriptor, const char *name=nullptr) override
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
void VisitDepthwiseConvolution2dLayer(const IConnectableLayer *layer, const DepthwiseConvolution2dDescriptor &desc, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr) override
A BatchToSpaceNdDescriptor for the BatchToSpaceNdLayer.
A NormalizationDescriptor for the NormalizationLayer.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
virtual const TensorInfo & GetTensorInfo() const =0
void VisitSliceLayer(const IConnectableLayer *layer, const SliceDescriptor &sliceDescriptor, const char *name=nullptr) override
virtual OffsetScalePair ComputeScheme(double min, double max) const =0
uint32_t m_TargetHeight
Target height value.
void VisitElementwiseUnaryLayer(const IConnectableLayer *layer, const ElementwiseUnaryDescriptor &elementwiseUnaryDescriptor, const char *name=nullptr) override
void VisitReshapeLayer(const IConnectableLayer *layer, const ReshapeDescriptor &reshapeDescriptor, const char *name=nullptr) override
void VisitBatchNormalizationLayer(const IConnectableLayer *layer, const BatchNormalizationDescriptor &desc, const ConstTensor &mean, const ConstTensor &variance, const ConstTensor &beta, const ConstTensor &gamma, const char *name=nullptr) override
QuantizerVisitor(const RangeTracker &rangeTracker, const IQuantizationScheme *quantizationScheme, bool preserveType=false)
ConstTensor CreateQuantizedConst(const ConstTensor &tensor, std::vector< uint8_t > &backing)
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
A PadDescriptor for the PadLayer.
An ActivationDescriptor for the ActivationLayer.
A BatchNormalizationDescriptor for the BatchNormalizationLayer.
void VisitAdditionLayer(const IConnectableLayer *layer, const char *name=nullptr) override
uint32_t m_TargetWidth
Target width value.
void VisitConstantLayer(const IConnectableLayer *layer, const ConstTensor &input, const char *name=nullptr) override
void VisitNormalizationLayer(const IConnectableLayer *layer, const NormalizationDescriptor &normalizationDescriptor, const char *name=nullptr) override
A ViewsDescriptor for the SplitterLayer. Descriptor to configure the splitting process. Number of Views must be equal to the number of outputs, and their order must match - e.g. first view corresponds to the first output, second view to the second output, etc.
void VisitLogSoftmaxLayer(const IConnectableLayer *layer, const LogSoftmaxDescriptor &logSoftmaxDescriptor, const char *name=nullptr) override
void VisitFullyConnectedLayer(const IConnectableLayer *layer, const FullyConnectedDescriptor &desc, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr) override
A SpaceToDepthDescriptor for the SpaceToDepthLayer.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
A ReshapeDescriptor for the ReshapeLayer.
virtual DataType GetDataType() const =0
void VisitConcatLayer(const IConnectableLayer *layer, const OriginsDescriptor &originsDescriptor, const char *name=nullptr) override
A TransposeConvolution2dDescriptor for the TransposeConvolution2dLayer.
A ElementwiseUnaryDescriptor for the ElementwiseUnaryLayer.
void VisitConvolution2dLayer(const IConnectableLayer *layer, const Convolution2dDescriptor &convolution2dDescriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr) override
void VisitPermuteLayer(const IConnectableLayer *layer, const PermuteDescriptor &permuteDescriptor, const char *name=nullptr) override
void VisitPooling2dLayer(const IConnectableLayer *layer, const Pooling2dDescriptor &pooling2dDescriptor, const char *name=nullptr) override
void VisitSubtractionLayer(const IConnectableLayer *layer, const char *name=nullptr) override
void VisitSpaceToDepthLayer(const IConnectableLayer *layer, const SpaceToDepthDescriptor &spaceToDepthDescriptor, const char *name=nullptr) override
unsigned int CalculateIndexOnOwner() const override
An InstanceNormalizationDescriptor for InstanceNormalizationLayer.
A FullyConnectedDescriptor for the FullyConnectedLayer.
void VisitResizeBilinearLayer(const IConnectableLayer *layer, const ResizeBilinearDescriptor &resizeDesc, const char *name=nullptr) override
void VisitTransposeConvolution2dLayer(const IConnectableLayer *layer, const TransposeConvolution2dDescriptor &descriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr) override
const TensorInfo & GetInfo() const
A StackDescriptor for the StackLayer.
void VisitSoftmaxLayer(const IConnectableLayer *layer, const SoftmaxDescriptor &softmaxDescriptor, const char *name=nullptr) override
void VisitPadLayer(const IConnectableLayer *, const PadDescriptor &, const char *name=nullptr) override
A ResizeBilinearDescriptor for the ResizeBilinearLayer.
A SoftmaxDescriptor for the SoftmaxLayer.
An output connection slot for a layer. The output slot may be connected to 1 or more input slots of s...
void VisitMultiplicationLayer(const IConnectableLayer *layer, const char *name=nullptr) override
uint32_t m_TargetWidth
Target width value.
void VisitStackLayer(const IConnectableLayer *layer, const StackDescriptor &stackDescriptor, const char *name=nullptr) override
void VisitComparisonLayer(const IConnectableLayer *layer, const ComparisonDescriptor &comparisonDescriptor, const char *name=nullptr) override
void VisitActivationLayer(const IConnectableLayer *layer, const ActivationDescriptor &activationDescriptor, const char *name=nullptr) override
void VisitMeanLayer(const IConnectableLayer *layer, const MeanDescriptor &meanDescriptor, const char *name=nullptr) override
void VisitInputLayer(const IConnectableLayer *layer, LayerBindingId id, const char *name=nullptr) override
MinMaxRange GetRange(LayerGuid guid, unsigned int idx) const
Retrieve the Range for a particular output slot on a particular layer.
void VisitSplitterLayer(const IConnectableLayer *layer, const SplitterDescriptor &splitterDescriptor, const char *name=nullptr) override
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
DataType GetDataType() const
A Pooling2dDescriptor for the Pooling2dLayer.
void VisitBatchToSpaceNdLayer(const IConnectableLayer *layer, const BatchToSpaceNdDescriptor &batchToSpaceNdDescriptor, const char *name=nullptr) override
virtual unsigned int GetNumInputSlots() const =0
A SliceDescriptor for the SliceLayer.
void VisitInstanceNormalizationLayer(const IConnectableLayer *layer, const InstanceNormalizationDescriptor &instanceNormalizationDescriptor, const char *name=nullptr) override
void VisitAbsLayer(const IConnectableLayer *layer, const char *name=nullptr) override
Functions to quantize the individual layers, overridden from ILayerVisitor.
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
std::pair< float, int > OffsetScalePair
void VisitOutputLayer(const IConnectableLayer *layer, LayerBindingId id, const char *name=nullptr) override
A PermuteDescriptor for the PermuteLayer.
A Convolution2dDescriptor for the Convolution2dLayer.
uint32_t m_TargetHeight
Target height value.
A MeanDescriptor for the MeanLayer.
virtual int Connect(IInputSlot &destination)=0
A SpaceToBatchNdDescriptor for the SpaceToBatchNdLayer.
void VisitResizeLayer(const IConnectableLayer *layer, const ResizeDescriptor &resizeDescriptor, const char *name=nullptr) override
const TensorInfo & GetTensorInfo() const override
An ArgMinMaxDescriptor for ArgMinMaxLayer.
bool has_value() const noexcept
Layer & GetOwningLayer() const
void VisitDepthToSpaceLayer(const IConnectableLayer *layer, const DepthToSpaceDescriptor &depthToSpaceDescriptor, const char *name=nullptr) override
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
void VisitArgMinMaxLayer(const IConnectableLayer *layer, const ArgMinMaxDescriptor &argMinMaxDescriptor, const char *name=nullptr) override
A ResizeDescriptor for the ResizeLayer.
A ComparisonDescriptor for the ComparisonLayer.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
void VisitRsqrtLayer(const IConnectableLayer *, const char *name=nullptr) override
float GetQuantizationScale() const
void VisitSpaceToBatchNdLayer(const IConnectableLayer *layer, const SpaceToBatchNdDescriptor &spaceToBatchNdDescriptor, const char *name=nullptr) override
An OriginsDescriptor for the ConcatLayer. Descriptor to configure the concatenation process...
A StridedSliceDescriptor for the StridedSliceLayer.