From 1e22d965034d8ca7964bd2be095adef67ca287e3 Mon Sep 17 00:00:00 2001 From: Sadik Armagan Date: Wed, 3 Aug 2022 11:27:05 +0100 Subject: IVGCVSW-6954 'Arm NN SL Improvements' * Move the Conv2D and DepthwiseConv2D validation to Optimization level when the weights and tensors are as constant inputs * Take into account offset and scales values when doing INT8 to FP32 dequantization Signed-off-by: Sadik Armagan Change-Id: I1f81f15640395ac041923b10dbe9151159715117 --- shim/sl/canonical/ConversionUtils.cpp | 46 +++++++++---- shim/sl/canonical/ConversionUtils.hpp | 4 ++ shim/sl/canonical/Converter.cpp | 77 +++++++++++++++------- src/armnn/Network.cpp | 20 +++--- ...nvertConstDequantisationLayersToConstLayers.hpp | 71 ++++++++++++++++++-- 5 files changed, 168 insertions(+), 50 deletions(-) diff --git a/shim/sl/canonical/ConversionUtils.cpp b/shim/sl/canonical/ConversionUtils.cpp index 96a8ddca6a..f48af32e21 100644 --- a/shim/sl/canonical/ConversionUtils.cpp +++ b/shim/sl/canonical/ConversionUtils.cpp @@ -67,6 +67,11 @@ void LayerInputHandle::SanitizeQuantizationScale(LayerInputHandle& weight, Layer } } +armnn::IOutputSlot* LayerInputHandle::GetOutputSlot() const +{ + return m_OutputSlot; +} + ConstTensorPin::ConstTensorPin(bool optional) : m_Optional(optional) {} @@ -276,17 +281,6 @@ LayerInputHandle ConvertToLayerInputHandle(const Operation& operation, case OperandLifeTime::CONSTANT_REFERENCE: { auto constantTensorDataType = operandTensorInfo.GetDataType(); - if (inputHandle) - { - if ((inputHandle->GetTensorInfo().GetDataType() == armnn::DataType::Float32 - || inputHandle->GetTensorInfo().GetDataType() == armnn::DataType::Float16) - && (operandTensorInfo.GetDataType() == armnn::DataType::QAsymmU8 - || operandTensorInfo.GetDataType() == armnn::DataType::QAsymmS8)) - { - constantTensorDataType = inputHandle->GetTensorInfo().GetDataType(); - } - } - // The tensor has an already known constant value, and can be converted into an ArmNN Constant layer. ConstTensorPin tensorPin = ConvertOperandToConstTensorPin(*operand, model, @@ -1029,4 +1023,34 @@ bool SetupAndTrackLayerOutputSlot(const Operation& operation, return true; } +bool IsConnectedToDequantize(armnn::IOutputSlot* ioutputSlot) +{ + VLOG(DRIVER) << "ConversionUtils::IsConnectedToDequantize()"; + if (!ioutputSlot) + { + return false; + } + VLOG(DRIVER) << "ConversionUtils::IsConnectedToDequantize() ioutputSlot is valid."; + // Find the connections and layers.. + armnn::IConnectableLayer& owningLayer = ioutputSlot->GetOwningIConnectableLayer(); + if (owningLayer.GetType() == armnn::LayerType::Dequantize) + { + VLOG(DRIVER) << "ConversionUtils::IsConnectedToDequantize() connected to Dequantize Layer."; + armnn::IInputSlot& inputSlot = owningLayer.GetInputSlot(0); + armnn::IOutputSlot* connection = inputSlot.GetConnection(); + if (connection) + { + VLOG(DRIVER) << "ConversionUtils::IsConnectedToDequantize() Dequantize Layer has a connection."; + armnn::IConnectableLayer& connectedLayer = + connection->GetOwningIConnectableLayer(); + if (connectedLayer.GetType() == armnn::LayerType::Constant) + { + VLOG(DRIVER) << "ConversionUtils::IsConnectedToDequantize() Dequantize Layer connected to Constant"; + return true; + } + } + } + return false; +} + } // namespace armnn_driver diff --git a/shim/sl/canonical/ConversionUtils.hpp b/shim/sl/canonical/ConversionUtils.hpp index 8058bcb379..beee00d11a 100644 --- a/shim/sl/canonical/ConversionUtils.hpp +++ b/shim/sl/canonical/ConversionUtils.hpp @@ -79,6 +79,8 @@ public: void SanitizeQuantizationScale(LayerInputHandle& weight, LayerInputHandle& input); + armnn::IOutputSlot* GetOutputSlot() const; + private: armnn::IOutputSlot* m_OutputSlot; bool m_Valid; @@ -1012,4 +1014,6 @@ ConstTensorPin DequantizeAndMakeConstTensorPin(const Operation& operation, size_t operandIndex, bool optional = false); +bool IsConnectedToDequantize(armnn::IOutputSlot* ioutputSlot); + } // namespace armnn_driver diff --git a/shim/sl/canonical/Converter.cpp b/shim/sl/canonical/Converter.cpp index fc983dc081..b50b0a9397 100644 --- a/shim/sl/canonical/Converter.cpp +++ b/shim/sl/canonical/Converter.cpp @@ -998,9 +998,20 @@ bool Converter::ConvertConv2d(const Operation& operation, const Model& model, Co desc.m_BiasEnabled = true; Optional biases(biasInfo); - bool isSupported = false; - auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported) + bool requiresValidation = true; + const Operand* weightsOperand = GetInputOperand(operation, 1, model); + const Operand* biasOperand = GetInputOperand(operation, 2, model); + if (IsConnectedToDequantize(weightsInput.GetOutputSlot()) + || IsConnectedToDequantize(biasInput.GetOutputSlot())) { + // Do not require validation for now. There will be an optimization step + // [ConvertConstDequantisationLayersToConstLayers] will convert layers to Constant layers + // then at the end of the optimization there will be layer supported validation. + requiresValidation = false; + VLOG(DRIVER) << "Converter::ConvertConv2d(): Weights and Biases are as INPUTS."; + } + + auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported) { FORWARD_LAYER_SUPPORT_FUNC(__func__, IsConvolution2dSupported, data.m_Backends, @@ -1012,18 +1023,23 @@ bool Converter::ConvertConv2d(const Operation& operation, const Model& model, Co biases); }; - if(!IsDynamicTensor(outputInfo)) + if (requiresValidation) { - validateFunc(outputInfo, isSupported); - } - else - { - isSupported = AreDynamicTensorsSupported(); - } + VLOG(DRIVER) << "Converter::ConvertConv2d(): Requires Validation!"; + bool isSupported = false; + if (!IsDynamicTensor(outputInfo)) + { + validateFunc(outputInfo, isSupported); + } + else + { + isSupported = AreDynamicTensorsSupported(); + } - if (!isSupported) - { - return false; + if (!isSupported) + { + return false; + } } armnn::IConnectableLayer* startLayer = data.m_Network->AddConvolution2dLayer(desc); @@ -1231,9 +1247,17 @@ bool Converter::ConvertDepthwiseConv2d(const Operation& operation, const Model& desc.m_BiasEnabled = true; Optional biases(biasInfo); - bool isSupported = false; - auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported) + bool requiresValidation = true; + if (IsConnectedToDequantize(weightsInput.GetOutputSlot()) || IsConnectedToDequantize(biasInput.GetOutputSlot())) { + // Do not require validation for now. There will be an optimization step + // [ConvertConstDequantisationLayersToConstLayers] will convert layers to Constant layers + // then at the end of the optimization there will be layer supported validation. + requiresValidation = false; + VLOG(DRIVER) << "Converter::ConvertDepthwiseConv2d(): Weights and Biases are as INPUTS."; + } + + auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported) { FORWARD_LAYER_SUPPORT_FUNC(__func__, IsDepthwiseConvolutionSupported, data.m_Backends, @@ -1245,18 +1269,23 @@ bool Converter::ConvertDepthwiseConv2d(const Operation& operation, const Model& biases); }; - if(!IsDynamicTensor(outputInfo)) + if (requiresValidation) { - validateFunc(outputInfo, isSupported); - } - else - { - isSupported = AreDynamicTensorsSupported(); - } + VLOG(DRIVER) << "Converter::ConvertDepthwiseConv2d(): Requires Validation!"; + bool isSupported = false; + if (!IsDynamicTensor(outputInfo)) + { + validateFunc(outputInfo, isSupported); + } + else + { + isSupported = AreDynamicTensorsSupported(); + } - if (!isSupported) - { - return false; + if (!isSupported) + { + return false; + } } armnn::IConnectableLayer* startLayer = data.m_Network->AddDepthwiseConvolution2dLayer(desc); diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 22fc0a3ed4..c4869fae04 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -2058,16 +2058,18 @@ IConnectableLayer* NetworkImpl::AddConvolution2dLayer(const Convolution2dDescrip auto layer = m_Graph->AddLayer(convolution2dDescriptor, name); // Add a constant layer for weights ConstantLayer* weightsLayer = m_Graph->AddLayer("Weights"); - weightsLayer->m_LayerOutput = std::make_shared(weights); - layer->m_Weight = std::make_shared(weights); + auto weightsTensorHandle = std::make_shared(weights); + weightsLayer->m_LayerOutput = weightsTensorHandle; + layer->m_Weight = weightsTensorHandle; weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsLayer->m_LayerOutput->GetTensorInfo()); weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); // Add a constant layer for biases if (biases.has_value() && convolution2dDescriptor.m_BiasEnabled) { ConstantLayer* biasLayer = m_Graph->AddLayer("Bias"); - biasLayer->m_LayerOutput = std::make_shared(biases.value()); - layer->m_Bias = std::make_shared(biases.value()); + auto biasTensorHandle = std::make_shared(biases.value()); + biasLayer->m_LayerOutput = biasTensorHandle; + layer->m_Bias = biasTensorHandle; biasLayer->GetOutputSlot(0).SetTensorInfo(biasLayer->m_LayerOutput->GetTensorInfo()); biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2)); } @@ -2113,8 +2115,9 @@ IConnectableLayer* NetworkImpl::AddDepthwiseConvolution2dLayer( // Add a constant layer for weights ConstantLayer* weightsLayer = m_Graph->AddLayer("Weights"); - weightsLayer->m_LayerOutput = std::make_shared(weights); - layer->m_Weight = std::make_shared(weights); + auto weightsTensorHandle = std::make_shared(weights); + weightsLayer->m_LayerOutput = weightsTensorHandle; + layer->m_Weight = weightsTensorHandle; weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsLayer->m_LayerOutput->GetTensorInfo()); weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); @@ -2123,8 +2126,9 @@ IConnectableLayer* NetworkImpl::AddDepthwiseConvolution2dLayer( if (biases.has_value() && convolution2dDescriptor.m_BiasEnabled) { ConstantLayer* biasLayer = m_Graph->AddLayer("Bias"); - biasLayer->m_LayerOutput = std::make_shared(biases.value()); - layer->m_Bias = std::make_shared(biases.value()); + auto biasTensorHandle = std::make_shared(biases.value()); + biasLayer->m_LayerOutput = biasTensorHandle; + layer->m_Bias = biasTensorHandle; biasLayer->GetOutputSlot(0).SetTensorInfo(biasLayer->m_LayerOutput->GetTensorInfo()); biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2)); diff --git a/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp b/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp index 16314dc0d0..27acf78d3e 100644 --- a/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp +++ b/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp @@ -7,6 +7,9 @@ #include "Optimization.hpp" #include "NetworkUtils.hpp" +#include +#include + namespace armnn { namespace optimizations @@ -33,11 +36,11 @@ protected: ~ConvertConstDequantisationLayersToConstLayersImpl() = default; private: - static void ReplaceConstDequantisationLayer(Graph& graph, + static void ReplaceConstDequantisationLayer(Graph&, ConstantLayer* constantLayer, DequantizeLayer* dequantizeLayer) { - IgnoreUnused(graph); + ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl::ReplaceConstDequantisationLayer()"; /** * This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization * layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we @@ -47,31 +50,80 @@ private: TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo(); + bool requiresPermute = false; + + auto connection = dequantizeLayer->GetOutputSlot(0).GetConnection(0); + if (connection) + { + if (connection->GetOwningLayer().GetType() == LayerType::Convolution2d) + { + /** + * ArmNN does not currently support non-fixed weights or bias + * The NNAPI filter is always OHWI [depth_out, filter_height, filter_width, depth_in] + * but ArmNN expects the filter's height and width indices to match the input's height + * and width indices so we permute it to OIHW if the DataLayout is NCHW + */ + ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to " + "Convolution layer."; + auto conv2dLayer = PolymorphicDowncast(&connection->GetOwningLayer()); + if (conv2dLayer->GetParameters().m_DataLayout == DataLayout::NCHW) + { + ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to " + "Convolution layer and requires permute on weights. "; + requiresPermute = true; + } + } + } + ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1); auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections(); + ARMNN_LOG(info) << "constantInfo datatype:" << armnn::GetDataTypeName(constantInfo.GetDataType()) + << "inputDequantizeInfo datatype:" << armnn::GetDataTypeName(inputDequantizeInfo.GetDataType()) + << "outputDequantizeInfo datatype:" << armnn::GetDataTypeName(outputDequantizeInfo.GetDataType()); + std::vector newValues(outputDequantizeInfo.GetNumElements()); if (constantInfo.GetDataType() == DataType::Float16 && inputDequantizeInfo.GetDataType() == DataType::Float16 && outputDequantizeInfo.GetDataType() == DataType::Float32) { + ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting FP16 -> FP32"; armnnUtils::FloatingPointConverter::ConvertFloat16To32(constantLayer->m_LayerOutput->Map(true), outputDequantizeInfo.GetNumElements(), newValues.data()); } - else if (constantInfo.GetDataType() == DataType::QAsymmS8 && - inputDequantizeInfo.GetDataType() == DataType::QAsymmS8 && + else if (((constantInfo.GetDataType() == DataType::QAsymmS8 + && inputDequantizeInfo.GetDataType() == DataType::QAsymmS8) + || (constantInfo.GetDataType() == DataType::QSymmS8 + && inputDequantizeInfo.GetDataType() == DataType::QSymmS8)) && outputDequantizeInfo.GetDataType() == DataType::Float32) { + ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting INT8 -> FP32"; ConvertInt8To32(constantLayer->m_LayerOutput->Map(true), outputDequantizeInfo.GetNumElements(), + inputDequantizeInfo.GetQuantizationScale(), + inputDequantizeInfo.GetQuantizationOffset(), newValues.data()); } TensorInfo newInfo = outputDequantizeInfo; newInfo.SetConstant(true); - ConstTensor newInput(newInfo, newValues); - constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput)); + if (requiresPermute) + { + ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Permuting the constant data."; + const PermutationVector OHWIToOIHW = {0, 2, 3, 1}; + std::vector permutedValues(outputDequantizeInfo.GetNumElements()); + armnnUtils::Permute(outputDequantizeInfo.GetShape(), OHWIToOIHW, + newValues.data(), permutedValues.data(), + GetDataTypeSize(outputDequantizeInfo.GetDataType())); + ConstTensor newInput(newInfo, permutedValues); + constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput)); + } + else + { + ConstTensor newInput(newInfo, newValues); + constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput)); + } // Moves connections in dequantize output to the constant layer. // Dequantize layer will be removed if left unconnected. @@ -95,16 +147,21 @@ private: static void ConvertInt8To32(const void* srcInt8Buffer, size_t numElements, + const float scale, + const int32_t offset, float* dstFloat32Buffer) { ARMNN_ASSERT(srcInt8Buffer != nullptr); ARMNN_ASSERT(dstFloat32Buffer != nullptr); + ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: scale: " << scale; + ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: offset: " << offset; + const auto* pInt8 = static_cast(srcInt8Buffer); for (size_t i = 0; i < numElements; ++i) { - dstFloat32Buffer[i] = pInt8[i]; + dstFloat32Buffer[i] = static_cast(pInt8[i] - offset) * scale; } } -- cgit v1.2.1