aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp')
-rw-r--r--src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp71
1 files changed, 64 insertions, 7 deletions
diff --git a/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp b/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp
index 16314dc0d0..27acf78d3e 100644
--- a/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp
+++ b/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp
@@ -7,6 +7,9 @@
#include "Optimization.hpp"
#include "NetworkUtils.hpp"
+#include <armnn/Logging.hpp>
+#include <armnnUtils/Permute.hpp>
+
namespace armnn
{
namespace optimizations
@@ -33,11 +36,11 @@ protected:
~ConvertConstDequantisationLayersToConstLayersImpl() = default;
private:
- static void ReplaceConstDequantisationLayer(Graph& graph,
+ static void ReplaceConstDequantisationLayer(Graph&,
ConstantLayer* constantLayer,
DequantizeLayer* dequantizeLayer)
{
- IgnoreUnused(graph);
+ ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl::ReplaceConstDequantisationLayer()";
/**
* This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization
* layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we
@@ -47,31 +50,80 @@ private:
TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo();
+ bool requiresPermute = false;
+
+ auto connection = dequantizeLayer->GetOutputSlot(0).GetConnection(0);
+ if (connection)
+ {
+ if (connection->GetOwningLayer().GetType() == LayerType::Convolution2d)
+ {
+ /**
+ * ArmNN does not currently support non-fixed weights or bias
+ * The NNAPI filter is always OHWI [depth_out, filter_height, filter_width, depth_in]
+ * but ArmNN expects the filter's height and width indices to match the input's height
+ * and width indices so we permute it to OIHW if the DataLayout is NCHW
+ */
+ ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
+ "Convolution layer.";
+ auto conv2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&connection->GetOwningLayer());
+ if (conv2dLayer->GetParameters().m_DataLayout == DataLayout::NCHW)
+ {
+ ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
+ "Convolution layer and requires permute on weights. ";
+ requiresPermute = true;
+ }
+ }
+ }
+
ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1);
auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections();
+ ARMNN_LOG(info) << "constantInfo datatype:" << armnn::GetDataTypeName(constantInfo.GetDataType())
+ << "inputDequantizeInfo datatype:" << armnn::GetDataTypeName(inputDequantizeInfo.GetDataType())
+ << "outputDequantizeInfo datatype:" << armnn::GetDataTypeName(outputDequantizeInfo.GetDataType());
+
std::vector<float> newValues(outputDequantizeInfo.GetNumElements());
if (constantInfo.GetDataType() == DataType::Float16 &&
inputDequantizeInfo.GetDataType() == DataType::Float16 &&
outputDequantizeInfo.GetDataType() == DataType::Float32)
{
+ ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting FP16 -> FP32";
armnnUtils::FloatingPointConverter::ConvertFloat16To32(constantLayer->m_LayerOutput->Map(true),
outputDequantizeInfo.GetNumElements(),
newValues.data());
}
- else if (constantInfo.GetDataType() == DataType::QAsymmS8 &&
- inputDequantizeInfo.GetDataType() == DataType::QAsymmS8 &&
+ else if (((constantInfo.GetDataType() == DataType::QAsymmS8
+ && inputDequantizeInfo.GetDataType() == DataType::QAsymmS8)
+ || (constantInfo.GetDataType() == DataType::QSymmS8
+ && inputDequantizeInfo.GetDataType() == DataType::QSymmS8)) &&
outputDequantizeInfo.GetDataType() == DataType::Float32)
{
+ ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting INT8 -> FP32";
ConvertInt8To32(constantLayer->m_LayerOutput->Map(true),
outputDequantizeInfo.GetNumElements(),
+ inputDequantizeInfo.GetQuantizationScale(),
+ inputDequantizeInfo.GetQuantizationOffset(),
newValues.data());
}
TensorInfo newInfo = outputDequantizeInfo;
newInfo.SetConstant(true);
- ConstTensor newInput(newInfo, newValues);
- constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
+ if (requiresPermute)
+ {
+ ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Permuting the constant data.";
+ const PermutationVector OHWIToOIHW = {0, 2, 3, 1};
+ std::vector<float> permutedValues(outputDequantizeInfo.GetNumElements());
+ armnnUtils::Permute(outputDequantizeInfo.GetShape(), OHWIToOIHW,
+ newValues.data(), permutedValues.data(),
+ GetDataTypeSize(outputDequantizeInfo.GetDataType()));
+ ConstTensor newInput(newInfo, permutedValues);
+ constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
+ }
+ else
+ {
+ ConstTensor newInput(newInfo, newValues);
+ constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
+ }
// Moves connections in dequantize output to the constant layer.
// Dequantize layer will be removed if left unconnected.
@@ -95,16 +147,21 @@ private:
static void ConvertInt8To32(const void* srcInt8Buffer,
size_t numElements,
+ const float scale,
+ const int32_t offset,
float* dstFloat32Buffer)
{
ARMNN_ASSERT(srcInt8Buffer != nullptr);
ARMNN_ASSERT(dstFloat32Buffer != nullptr);
+ ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: scale: " << scale;
+ ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: offset: " << offset;
+
const auto* pInt8 = static_cast<const int8_t*>(srcInt8Buffer);
for (size_t i = 0; i < numElements; ++i)
{
- dstFloat32Buffer[i] = pInt8[i];
+ dstFloat32Buffer[i] = static_cast<float>(pInt8[i] - offset) * scale;
}
}