diff options
author | Sadik Armagan <sadik.armagan@arm.com> | 2022-08-03 11:27:05 +0100 |
---|---|---|
committer | Nikhil Raj <nikhil.raj@arm.com> | 2022-08-29 10:12:21 +0100 |
commit | b016157f1eea1acc6a84308521c0b90543161da4 (patch) | |
tree | fe228d1014f4fa9a4f74227d0640719d1d92193c /src/armnn | |
parent | ee480d2d6538b0192d40a00ed696b30e2587430c (diff) | |
download | armnn-b016157f1eea1acc6a84308521c0b90543161da4.tar.gz |
IVGCVSW-6954 'Arm NN SL Improvements'
* Move the Conv2D and DepthwiseConv2D validation to Optimization level
when the weights and tensors are as constant inputs
* Take into account offset and scales values when doing INT8 to FP32 dequantization
Signed-off-by: Sadik Armagan <sadik.armagan@arm.com>
Change-Id: I1f81f15640395ac041923b10dbe9151159715117
Diffstat (limited to 'src/armnn')
-rw-r--r-- | src/armnn/Network.cpp | 20 | ||||
-rw-r--r-- | src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp | 71 |
2 files changed, 76 insertions, 15 deletions
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 22fc0a3ed4..c4869fae04 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -2058,16 +2058,18 @@ IConnectableLayer* NetworkImpl::AddConvolution2dLayer(const Convolution2dDescrip auto layer = m_Graph->AddLayer<Convolution2dLayer>(convolution2dDescriptor, name); // Add a constant layer for weights ConstantLayer* weightsLayer = m_Graph->AddLayer<ConstantLayer>("Weights"); - weightsLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(weights); - layer->m_Weight = std::make_shared<ScopedTensorHandle>(weights); + auto weightsTensorHandle = std::make_shared<ScopedTensorHandle>(weights); + weightsLayer->m_LayerOutput = weightsTensorHandle; + layer->m_Weight = weightsTensorHandle; weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsLayer->m_LayerOutput->GetTensorInfo()); weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); // Add a constant layer for biases if (biases.has_value() && convolution2dDescriptor.m_BiasEnabled) { ConstantLayer* biasLayer = m_Graph->AddLayer<ConstantLayer>("Bias"); - biasLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(biases.value()); - layer->m_Bias = std::make_shared<ScopedTensorHandle>(biases.value()); + auto biasTensorHandle = std::make_shared<ScopedTensorHandle>(biases.value()); + biasLayer->m_LayerOutput = biasTensorHandle; + layer->m_Bias = biasTensorHandle; biasLayer->GetOutputSlot(0).SetTensorInfo(biasLayer->m_LayerOutput->GetTensorInfo()); biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2)); } @@ -2113,8 +2115,9 @@ IConnectableLayer* NetworkImpl::AddDepthwiseConvolution2dLayer( // Add a constant layer for weights ConstantLayer* weightsLayer = m_Graph->AddLayer<ConstantLayer>("Weights"); - weightsLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(weights); - layer->m_Weight = std::make_shared<ScopedTensorHandle>(weights); + auto weightsTensorHandle = std::make_shared<ScopedTensorHandle>(weights); + weightsLayer->m_LayerOutput = weightsTensorHandle; + layer->m_Weight = weightsTensorHandle; weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsLayer->m_LayerOutput->GetTensorInfo()); weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); @@ -2123,8 +2126,9 @@ IConnectableLayer* NetworkImpl::AddDepthwiseConvolution2dLayer( if (biases.has_value() && convolution2dDescriptor.m_BiasEnabled) { ConstantLayer* biasLayer = m_Graph->AddLayer<ConstantLayer>("Bias"); - biasLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(biases.value()); - layer->m_Bias = std::make_shared<ScopedTensorHandle>(biases.value()); + auto biasTensorHandle = std::make_shared<ScopedTensorHandle>(biases.value()); + biasLayer->m_LayerOutput = biasTensorHandle; + layer->m_Bias = biasTensorHandle; biasLayer->GetOutputSlot(0).SetTensorInfo(biasLayer->m_LayerOutput->GetTensorInfo()); biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2)); diff --git a/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp b/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp index 16314dc0d0..27acf78d3e 100644 --- a/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp +++ b/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp @@ -7,6 +7,9 @@ #include "Optimization.hpp" #include "NetworkUtils.hpp" +#include <armnn/Logging.hpp> +#include <armnnUtils/Permute.hpp> + namespace armnn { namespace optimizations @@ -33,11 +36,11 @@ protected: ~ConvertConstDequantisationLayersToConstLayersImpl() = default; private: - static void ReplaceConstDequantisationLayer(Graph& graph, + static void ReplaceConstDequantisationLayer(Graph&, ConstantLayer* constantLayer, DequantizeLayer* dequantizeLayer) { - IgnoreUnused(graph); + ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl::ReplaceConstDequantisationLayer()"; /** * This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization * layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we @@ -47,31 +50,80 @@ private: TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo(); + bool requiresPermute = false; + + auto connection = dequantizeLayer->GetOutputSlot(0).GetConnection(0); + if (connection) + { + if (connection->GetOwningLayer().GetType() == LayerType::Convolution2d) + { + /** + * ArmNN does not currently support non-fixed weights or bias + * The NNAPI filter is always OHWI [depth_out, filter_height, filter_width, depth_in] + * but ArmNN expects the filter's height and width indices to match the input's height + * and width indices so we permute it to OIHW if the DataLayout is NCHW + */ + ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to " + "Convolution layer."; + auto conv2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&connection->GetOwningLayer()); + if (conv2dLayer->GetParameters().m_DataLayout == DataLayout::NCHW) + { + ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to " + "Convolution layer and requires permute on weights. "; + requiresPermute = true; + } + } + } + ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1); auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections(); + ARMNN_LOG(info) << "constantInfo datatype:" << armnn::GetDataTypeName(constantInfo.GetDataType()) + << "inputDequantizeInfo datatype:" << armnn::GetDataTypeName(inputDequantizeInfo.GetDataType()) + << "outputDequantizeInfo datatype:" << armnn::GetDataTypeName(outputDequantizeInfo.GetDataType()); + std::vector<float> newValues(outputDequantizeInfo.GetNumElements()); if (constantInfo.GetDataType() == DataType::Float16 && inputDequantizeInfo.GetDataType() == DataType::Float16 && outputDequantizeInfo.GetDataType() == DataType::Float32) { + ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting FP16 -> FP32"; armnnUtils::FloatingPointConverter::ConvertFloat16To32(constantLayer->m_LayerOutput->Map(true), outputDequantizeInfo.GetNumElements(), newValues.data()); } - else if (constantInfo.GetDataType() == DataType::QAsymmS8 && - inputDequantizeInfo.GetDataType() == DataType::QAsymmS8 && + else if (((constantInfo.GetDataType() == DataType::QAsymmS8 + && inputDequantizeInfo.GetDataType() == DataType::QAsymmS8) + || (constantInfo.GetDataType() == DataType::QSymmS8 + && inputDequantizeInfo.GetDataType() == DataType::QSymmS8)) && outputDequantizeInfo.GetDataType() == DataType::Float32) { + ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting INT8 -> FP32"; ConvertInt8To32(constantLayer->m_LayerOutput->Map(true), outputDequantizeInfo.GetNumElements(), + inputDequantizeInfo.GetQuantizationScale(), + inputDequantizeInfo.GetQuantizationOffset(), newValues.data()); } TensorInfo newInfo = outputDequantizeInfo; newInfo.SetConstant(true); - ConstTensor newInput(newInfo, newValues); - constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput)); + if (requiresPermute) + { + ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Permuting the constant data."; + const PermutationVector OHWIToOIHW = {0, 2, 3, 1}; + std::vector<float> permutedValues(outputDequantizeInfo.GetNumElements()); + armnnUtils::Permute(outputDequantizeInfo.GetShape(), OHWIToOIHW, + newValues.data(), permutedValues.data(), + GetDataTypeSize(outputDequantizeInfo.GetDataType())); + ConstTensor newInput(newInfo, permutedValues); + constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput)); + } + else + { + ConstTensor newInput(newInfo, newValues); + constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput)); + } // Moves connections in dequantize output to the constant layer. // Dequantize layer will be removed if left unconnected. @@ -95,16 +147,21 @@ private: static void ConvertInt8To32(const void* srcInt8Buffer, size_t numElements, + const float scale, + const int32_t offset, float* dstFloat32Buffer) { ARMNN_ASSERT(srcInt8Buffer != nullptr); ARMNN_ASSERT(dstFloat32Buffer != nullptr); + ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: scale: " << scale; + ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: offset: " << offset; + const auto* pInt8 = static_cast<const int8_t*>(srcInt8Buffer); for (size_t i = 0; i < numElements; ++i) { - dstFloat32Buffer[i] = pInt8[i]; + dstFloat32Buffer[i] = static_cast<float>(pInt8[i] - offset) * scale; } } |