aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSadik Armagan <sadik.armagan@arm.com>2022-08-03 11:27:05 +0100
committerCathal Corbett <cathal.corbett@arm.com>2022-08-12 10:14:15 +0100
commit1e22d965034d8ca7964bd2be095adef67ca287e3 (patch)
tree55ced2f4a4b3f47374dfb0d5b7a4ff326b0a203c
parent560c393829bb24b60715849c26b21c58a0d92c32 (diff)
downloadarmnn-1e22d965034d8ca7964bd2be095adef67ca287e3.tar.gz
IVGCVSW-6954 'Arm NN SL Improvements'
* Move the Conv2D and DepthwiseConv2D validation to Optimization level when the weights and tensors are as constant inputs * Take into account offset and scales values when doing INT8 to FP32 dequantization Signed-off-by: Sadik Armagan <sadik.armagan@arm.com> Change-Id: I1f81f15640395ac041923b10dbe9151159715117
-rw-r--r--shim/sl/canonical/ConversionUtils.cpp46
-rw-r--r--shim/sl/canonical/ConversionUtils.hpp4
-rw-r--r--shim/sl/canonical/Converter.cpp77
-rw-r--r--src/armnn/Network.cpp20
-rw-r--r--src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp71
5 files changed, 168 insertions, 50 deletions
diff --git a/shim/sl/canonical/ConversionUtils.cpp b/shim/sl/canonical/ConversionUtils.cpp
index 96a8ddca6a..f48af32e21 100644
--- a/shim/sl/canonical/ConversionUtils.cpp
+++ b/shim/sl/canonical/ConversionUtils.cpp
@@ -67,6 +67,11 @@ void LayerInputHandle::SanitizeQuantizationScale(LayerInputHandle& weight, Layer
}
}
+armnn::IOutputSlot* LayerInputHandle::GetOutputSlot() const
+{
+ return m_OutputSlot;
+}
+
ConstTensorPin::ConstTensorPin(bool optional)
: m_Optional(optional)
{}
@@ -276,17 +281,6 @@ LayerInputHandle ConvertToLayerInputHandle(const Operation& operation,
case OperandLifeTime::CONSTANT_REFERENCE:
{
auto constantTensorDataType = operandTensorInfo.GetDataType();
- if (inputHandle)
- {
- if ((inputHandle->GetTensorInfo().GetDataType() == armnn::DataType::Float32
- || inputHandle->GetTensorInfo().GetDataType() == armnn::DataType::Float16)
- && (operandTensorInfo.GetDataType() == armnn::DataType::QAsymmU8
- || operandTensorInfo.GetDataType() == armnn::DataType::QAsymmS8))
- {
- constantTensorDataType = inputHandle->GetTensorInfo().GetDataType();
- }
- }
-
// The tensor has an already known constant value, and can be converted into an ArmNN Constant layer.
ConstTensorPin tensorPin = ConvertOperandToConstTensorPin(*operand,
model,
@@ -1029,4 +1023,34 @@ bool SetupAndTrackLayerOutputSlot(const Operation& operation,
return true;
}
+bool IsConnectedToDequantize(armnn::IOutputSlot* ioutputSlot)
+{
+ VLOG(DRIVER) << "ConversionUtils::IsConnectedToDequantize()";
+ if (!ioutputSlot)
+ {
+ return false;
+ }
+ VLOG(DRIVER) << "ConversionUtils::IsConnectedToDequantize() ioutputSlot is valid.";
+ // Find the connections and layers..
+ armnn::IConnectableLayer& owningLayer = ioutputSlot->GetOwningIConnectableLayer();
+ if (owningLayer.GetType() == armnn::LayerType::Dequantize)
+ {
+ VLOG(DRIVER) << "ConversionUtils::IsConnectedToDequantize() connected to Dequantize Layer.";
+ armnn::IInputSlot& inputSlot = owningLayer.GetInputSlot(0);
+ armnn::IOutputSlot* connection = inputSlot.GetConnection();
+ if (connection)
+ {
+ VLOG(DRIVER) << "ConversionUtils::IsConnectedToDequantize() Dequantize Layer has a connection.";
+ armnn::IConnectableLayer& connectedLayer =
+ connection->GetOwningIConnectableLayer();
+ if (connectedLayer.GetType() == armnn::LayerType::Constant)
+ {
+ VLOG(DRIVER) << "ConversionUtils::IsConnectedToDequantize() Dequantize Layer connected to Constant";
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
} // namespace armnn_driver
diff --git a/shim/sl/canonical/ConversionUtils.hpp b/shim/sl/canonical/ConversionUtils.hpp
index 8058bcb379..beee00d11a 100644
--- a/shim/sl/canonical/ConversionUtils.hpp
+++ b/shim/sl/canonical/ConversionUtils.hpp
@@ -79,6 +79,8 @@ public:
void SanitizeQuantizationScale(LayerInputHandle& weight, LayerInputHandle& input);
+ armnn::IOutputSlot* GetOutputSlot() const;
+
private:
armnn::IOutputSlot* m_OutputSlot;
bool m_Valid;
@@ -1012,4 +1014,6 @@ ConstTensorPin DequantizeAndMakeConstTensorPin(const Operation& operation,
size_t operandIndex,
bool optional = false);
+bool IsConnectedToDequantize(armnn::IOutputSlot* ioutputSlot);
+
} // namespace armnn_driver
diff --git a/shim/sl/canonical/Converter.cpp b/shim/sl/canonical/Converter.cpp
index fc983dc081..b50b0a9397 100644
--- a/shim/sl/canonical/Converter.cpp
+++ b/shim/sl/canonical/Converter.cpp
@@ -998,9 +998,20 @@ bool Converter::ConvertConv2d(const Operation& operation, const Model& model, Co
desc.m_BiasEnabled = true;
Optional<TensorInfo> biases(biasInfo);
- bool isSupported = false;
- auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
+ bool requiresValidation = true;
+ const Operand* weightsOperand = GetInputOperand(operation, 1, model);
+ const Operand* biasOperand = GetInputOperand(operation, 2, model);
+ if (IsConnectedToDequantize(weightsInput.GetOutputSlot())
+ || IsConnectedToDequantize(biasInput.GetOutputSlot()))
{
+ // Do not require validation for now. There will be an optimization step
+ // [ConvertConstDequantisationLayersToConstLayers] will convert layers to Constant layers
+ // then at the end of the optimization there will be layer supported validation.
+ requiresValidation = false;
+ VLOG(DRIVER) << "Converter::ConvertConv2d(): Weights and Biases are as INPUTS.";
+ }
+
+ auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported) {
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsConvolution2dSupported,
data.m_Backends,
@@ -1012,18 +1023,23 @@ bool Converter::ConvertConv2d(const Operation& operation, const Model& model, Co
biases);
};
- if(!IsDynamicTensor(outputInfo))
+ if (requiresValidation)
{
- validateFunc(outputInfo, isSupported);
- }
- else
- {
- isSupported = AreDynamicTensorsSupported();
- }
+ VLOG(DRIVER) << "Converter::ConvertConv2d(): Requires Validation!";
+ bool isSupported = false;
+ if (!IsDynamicTensor(outputInfo))
+ {
+ validateFunc(outputInfo, isSupported);
+ }
+ else
+ {
+ isSupported = AreDynamicTensorsSupported();
+ }
- if (!isSupported)
- {
- return false;
+ if (!isSupported)
+ {
+ return false;
+ }
}
armnn::IConnectableLayer* startLayer = data.m_Network->AddConvolution2dLayer(desc);
@@ -1231,9 +1247,17 @@ bool Converter::ConvertDepthwiseConv2d(const Operation& operation, const Model&
desc.m_BiasEnabled = true;
Optional<TensorInfo> biases(biasInfo);
- bool isSupported = false;
- auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
+ bool requiresValidation = true;
+ if (IsConnectedToDequantize(weightsInput.GetOutputSlot()) || IsConnectedToDequantize(biasInput.GetOutputSlot()))
{
+ // Do not require validation for now. There will be an optimization step
+ // [ConvertConstDequantisationLayersToConstLayers] will convert layers to Constant layers
+ // then at the end of the optimization there will be layer supported validation.
+ requiresValidation = false;
+ VLOG(DRIVER) << "Converter::ConvertDepthwiseConv2d(): Weights and Biases are as INPUTS.";
+ }
+
+ auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported) {
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsDepthwiseConvolutionSupported,
data.m_Backends,
@@ -1245,18 +1269,23 @@ bool Converter::ConvertDepthwiseConv2d(const Operation& operation, const Model&
biases);
};
- if(!IsDynamicTensor(outputInfo))
+ if (requiresValidation)
{
- validateFunc(outputInfo, isSupported);
- }
- else
- {
- isSupported = AreDynamicTensorsSupported();
- }
+ VLOG(DRIVER) << "Converter::ConvertDepthwiseConv2d(): Requires Validation!";
+ bool isSupported = false;
+ if (!IsDynamicTensor(outputInfo))
+ {
+ validateFunc(outputInfo, isSupported);
+ }
+ else
+ {
+ isSupported = AreDynamicTensorsSupported();
+ }
- if (!isSupported)
- {
- return false;
+ if (!isSupported)
+ {
+ return false;
+ }
}
armnn::IConnectableLayer* startLayer = data.m_Network->AddDepthwiseConvolution2dLayer(desc);
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 22fc0a3ed4..c4869fae04 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -2058,16 +2058,18 @@ IConnectableLayer* NetworkImpl::AddConvolution2dLayer(const Convolution2dDescrip
auto layer = m_Graph->AddLayer<Convolution2dLayer>(convolution2dDescriptor, name);
// Add a constant layer for weights
ConstantLayer* weightsLayer = m_Graph->AddLayer<ConstantLayer>("Weights");
- weightsLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(weights);
- layer->m_Weight = std::make_shared<ScopedTensorHandle>(weights);
+ auto weightsTensorHandle = std::make_shared<ScopedTensorHandle>(weights);
+ weightsLayer->m_LayerOutput = weightsTensorHandle;
+ layer->m_Weight = weightsTensorHandle;
weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsLayer->m_LayerOutput->GetTensorInfo());
weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
// Add a constant layer for biases
if (biases.has_value() && convolution2dDescriptor.m_BiasEnabled)
{
ConstantLayer* biasLayer = m_Graph->AddLayer<ConstantLayer>("Bias");
- biasLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(biases.value());
- layer->m_Bias = std::make_shared<ScopedTensorHandle>(biases.value());
+ auto biasTensorHandle = std::make_shared<ScopedTensorHandle>(biases.value());
+ biasLayer->m_LayerOutput = biasTensorHandle;
+ layer->m_Bias = biasTensorHandle;
biasLayer->GetOutputSlot(0).SetTensorInfo(biasLayer->m_LayerOutput->GetTensorInfo());
biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2));
}
@@ -2113,8 +2115,9 @@ IConnectableLayer* NetworkImpl::AddDepthwiseConvolution2dLayer(
// Add a constant layer for weights
ConstantLayer* weightsLayer = m_Graph->AddLayer<ConstantLayer>("Weights");
- weightsLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(weights);
- layer->m_Weight = std::make_shared<ScopedTensorHandle>(weights);
+ auto weightsTensorHandle = std::make_shared<ScopedTensorHandle>(weights);
+ weightsLayer->m_LayerOutput = weightsTensorHandle;
+ layer->m_Weight = weightsTensorHandle;
weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsLayer->m_LayerOutput->GetTensorInfo());
weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
@@ -2123,8 +2126,9 @@ IConnectableLayer* NetworkImpl::AddDepthwiseConvolution2dLayer(
if (biases.has_value() && convolution2dDescriptor.m_BiasEnabled)
{
ConstantLayer* biasLayer = m_Graph->AddLayer<ConstantLayer>("Bias");
- biasLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(biases.value());
- layer->m_Bias = std::make_shared<ScopedTensorHandle>(biases.value());
+ auto biasTensorHandle = std::make_shared<ScopedTensorHandle>(biases.value());
+ biasLayer->m_LayerOutput = biasTensorHandle;
+ layer->m_Bias = biasTensorHandle;
biasLayer->GetOutputSlot(0).SetTensorInfo(biasLayer->m_LayerOutput->GetTensorInfo());
biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2));
diff --git a/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp b/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp
index 16314dc0d0..27acf78d3e 100644
--- a/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp
+++ b/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp
@@ -7,6 +7,9 @@
#include "Optimization.hpp"
#include "NetworkUtils.hpp"
+#include <armnn/Logging.hpp>
+#include <armnnUtils/Permute.hpp>
+
namespace armnn
{
namespace optimizations
@@ -33,11 +36,11 @@ protected:
~ConvertConstDequantisationLayersToConstLayersImpl() = default;
private:
- static void ReplaceConstDequantisationLayer(Graph& graph,
+ static void ReplaceConstDequantisationLayer(Graph&,
ConstantLayer* constantLayer,
DequantizeLayer* dequantizeLayer)
{
- IgnoreUnused(graph);
+ ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl::ReplaceConstDequantisationLayer()";
/**
* This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization
* layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we
@@ -47,31 +50,80 @@ private:
TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo();
+ bool requiresPermute = false;
+
+ auto connection = dequantizeLayer->GetOutputSlot(0).GetConnection(0);
+ if (connection)
+ {
+ if (connection->GetOwningLayer().GetType() == LayerType::Convolution2d)
+ {
+ /**
+ * ArmNN does not currently support non-fixed weights or bias
+ * The NNAPI filter is always OHWI [depth_out, filter_height, filter_width, depth_in]
+ * but ArmNN expects the filter's height and width indices to match the input's height
+ * and width indices so we permute it to OIHW if the DataLayout is NCHW
+ */
+ ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
+ "Convolution layer.";
+ auto conv2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&connection->GetOwningLayer());
+ if (conv2dLayer->GetParameters().m_DataLayout == DataLayout::NCHW)
+ {
+ ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
+ "Convolution layer and requires permute on weights. ";
+ requiresPermute = true;
+ }
+ }
+ }
+
ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1);
auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections();
+ ARMNN_LOG(info) << "constantInfo datatype:" << armnn::GetDataTypeName(constantInfo.GetDataType())
+ << "inputDequantizeInfo datatype:" << armnn::GetDataTypeName(inputDequantizeInfo.GetDataType())
+ << "outputDequantizeInfo datatype:" << armnn::GetDataTypeName(outputDequantizeInfo.GetDataType());
+
std::vector<float> newValues(outputDequantizeInfo.GetNumElements());
if (constantInfo.GetDataType() == DataType::Float16 &&
inputDequantizeInfo.GetDataType() == DataType::Float16 &&
outputDequantizeInfo.GetDataType() == DataType::Float32)
{
+ ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting FP16 -> FP32";
armnnUtils::FloatingPointConverter::ConvertFloat16To32(constantLayer->m_LayerOutput->Map(true),
outputDequantizeInfo.GetNumElements(),
newValues.data());
}
- else if (constantInfo.GetDataType() == DataType::QAsymmS8 &&
- inputDequantizeInfo.GetDataType() == DataType::QAsymmS8 &&
+ else if (((constantInfo.GetDataType() == DataType::QAsymmS8
+ && inputDequantizeInfo.GetDataType() == DataType::QAsymmS8)
+ || (constantInfo.GetDataType() == DataType::QSymmS8
+ && inputDequantizeInfo.GetDataType() == DataType::QSymmS8)) &&
outputDequantizeInfo.GetDataType() == DataType::Float32)
{
+ ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting INT8 -> FP32";
ConvertInt8To32(constantLayer->m_LayerOutput->Map(true),
outputDequantizeInfo.GetNumElements(),
+ inputDequantizeInfo.GetQuantizationScale(),
+ inputDequantizeInfo.GetQuantizationOffset(),
newValues.data());
}
TensorInfo newInfo = outputDequantizeInfo;
newInfo.SetConstant(true);
- ConstTensor newInput(newInfo, newValues);
- constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
+ if (requiresPermute)
+ {
+ ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Permuting the constant data.";
+ const PermutationVector OHWIToOIHW = {0, 2, 3, 1};
+ std::vector<float> permutedValues(outputDequantizeInfo.GetNumElements());
+ armnnUtils::Permute(outputDequantizeInfo.GetShape(), OHWIToOIHW,
+ newValues.data(), permutedValues.data(),
+ GetDataTypeSize(outputDequantizeInfo.GetDataType()));
+ ConstTensor newInput(newInfo, permutedValues);
+ constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
+ }
+ else
+ {
+ ConstTensor newInput(newInfo, newValues);
+ constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
+ }
// Moves connections in dequantize output to the constant layer.
// Dequantize layer will be removed if left unconnected.
@@ -95,16 +147,21 @@ private:
static void ConvertInt8To32(const void* srcInt8Buffer,
size_t numElements,
+ const float scale,
+ const int32_t offset,
float* dstFloat32Buffer)
{
ARMNN_ASSERT(srcInt8Buffer != nullptr);
ARMNN_ASSERT(dstFloat32Buffer != nullptr);
+ ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: scale: " << scale;
+ ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: offset: " << offset;
+
const auto* pInt8 = static_cast<const int8_t*>(srcInt8Buffer);
for (size_t i = 0; i < numElements; ++i)
{
- dstFloat32Buffer[i] = pInt8[i];
+ dstFloat32Buffer[i] = static_cast<float>(pInt8[i] - offset) * scale;
}
}