aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTeresa Charlin <teresa.charlinreyes@arm.com>2022-05-15 14:07:05 +0100
committerRyan OShea <ryan.oshea3@arm.com>2022-05-19 11:06:53 +0100
commit5841c740ba6bc6c8c3e96d24156dc47907af6430 (patch)
treefef7e7068a613e34491a7c846dda9b19b61e3a8f
parent21fe06fad6760a0d453f2de9c8dd790983ae940c (diff)
downloadarmnn-5841c740ba6bc6c8c3e96d24156dc47907af6430.tar.gz
IVGCVSW-6455 Support Const + Dequantize layer and optimize it.
* Support Float16 as input to Dequantize layer * Add Optimization to substitute Const+Dequantize layers with Const layer Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com> Change-Id: I58bb7e3871ca480c7b6fca93c4efb2de84e09e64 Signed-off-by: David <david.monahan@arm.com>
-rw-r--r--CMakeLists.txt1
-rw-r--r--src/armnn/Network.cpp1
-rw-r--r--src/armnn/optimizations/All.hpp1
-rw-r--r--src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp119
-rw-r--r--src/armnn/test/optimizations/ConvertConstDequantisationLayersToConstLayersTest.cpp105
-rw-r--r--src/backends/backendsCommon/WorkloadData.cpp15
6 files changed, 237 insertions, 5 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e67c389f3d..903f06c86c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -528,6 +528,7 @@ if(BUILD_UNIT_TESTS)
src/armnn/test/ObservableTest.cpp
src/armnn/test/OptimizerTests.cpp
src/armnn/test/optimizations/AddBroadcastReshapeLayerTests.cpp
+ src/armnn/test/optimizations/ConvertConstDequantisationLayersToConstLayersTest.cpp
src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp
src/armnn/test/optimizations/ConvertConstantsFloatToHalfTests.cpp
src/armnn/test/optimizations/ConvertConstantsHalfToFloatTests.cpp
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 6a646d3cc8..9da28ceeea 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -1739,6 +1739,7 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph,
FuseBatchNormIntoConvolution2DFloat16(),
FuseBatchNormIntoDepthwiseConvolution2DFloat32(),
FuseBatchNormIntoDepthwiseConvolution2DFloat16(),
+ ConvertConstDequantisationLayersToConstLayers(),
RedirectMembersToConstantInputs()));
// If Fp32 to Fp16 optimization is set convert Fp32 network to Fp16
diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp
index 38c4ac9462..e4a1f33e08 100644
--- a/src/armnn/optimizations/All.hpp
+++ b/src/armnn/optimizations/All.hpp
@@ -7,6 +7,7 @@
#include "AddBroadcastReshapeLayer.hpp"
#include "AddDebug.hpp"
#include "ConvertConstants.hpp"
+#include "ConvertConstDequantisationLayersToConstLayers.hpp"
#include "ConvertFp32NetworkToBf16.hpp"
#include "ConvertFp32NetworkToFp16.hpp"
#include "FoldPadIntoLayer2d.hpp"
diff --git a/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp b/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp
new file mode 100644
index 0000000000..16314dc0d0
--- /dev/null
+++ b/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp
@@ -0,0 +1,119 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "Optimization.hpp"
+#include "NetworkUtils.hpp"
+
+namespace armnn
+{
+namespace optimizations
+{
+
+class ConvertConstDequantisationLayersToConstLayersImpl
+{
+public:
+ void Run(Graph& graph, InputSlot& connection) const
+ {
+ Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
+ Layer& child = connection.GetOwningLayer();
+
+ ARMNN_ASSERT(base.GetType() == LayerType::Constant);
+ ARMNN_ASSERT(child.GetType() == LayerType::Dequantize);
+
+ ReplaceConstDequantisationLayer(graph,
+ PolymorphicDowncast<ConstantLayer*>(&base),
+ PolymorphicDowncast<DequantizeLayer*>(&child));
+
+ }
+protected:
+ ConvertConstDequantisationLayersToConstLayersImpl() = default;
+ ~ConvertConstDequantisationLayersToConstLayersImpl() = default;
+private:
+
+ static void ReplaceConstDequantisationLayer(Graph& graph,
+ ConstantLayer* constantLayer,
+ DequantizeLayer* dequantizeLayer)
+ {
+ IgnoreUnused(graph);
+ /**
+ * This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization
+ * layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we
+ * want to Dequantize them once and store them in a Const layer to be used everytime as they will not change.
+ */
+ TensorInfo constantInfo = constantLayer->GetOutputSlot(0).GetTensorInfo();
+ TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+ TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo();
+
+ ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1);
+ auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections();
+
+ std::vector<float> newValues(outputDequantizeInfo.GetNumElements());
+ if (constantInfo.GetDataType() == DataType::Float16 &&
+ inputDequantizeInfo.GetDataType() == DataType::Float16 &&
+ outputDequantizeInfo.GetDataType() == DataType::Float32)
+ {
+ armnnUtils::FloatingPointConverter::ConvertFloat16To32(constantLayer->m_LayerOutput->Map(true),
+ outputDequantizeInfo.GetNumElements(),
+ newValues.data());
+ }
+ else if (constantInfo.GetDataType() == DataType::QAsymmS8 &&
+ inputDequantizeInfo.GetDataType() == DataType::QAsymmS8 &&
+ outputDequantizeInfo.GetDataType() == DataType::Float32)
+ {
+ ConvertInt8To32(constantLayer->m_LayerOutput->Map(true),
+ outputDequantizeInfo.GetNumElements(),
+ newValues.data());
+ }
+
+ TensorInfo newInfo = outputDequantizeInfo;
+ newInfo.SetConstant(true);
+ ConstTensor newInput(newInfo, newValues);
+ constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
+
+ // Moves connections in dequantize output to the constant layer.
+ // Dequantize layer will be removed if left unconnected.
+ dequantizeLayer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot());
+
+ // Updating the output tensor
+ constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
+ ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true);
+
+ // Set isConstant to true in all input tensor infos where constantLayer is now connected to
+ for (unsigned int i = numConnections; i < constantLayer->GetOutputSlot(0).GetNumConnections(); ++i)
+ {
+ auto info = constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
+ .GetConnectedOutputSlot()->GetTensorInfo();
+ info.SetConstant();
+ constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
+ .GetConnectedOutputSlot()->SetTensorInfo(info);
+ }
+ }
+
+
+static void ConvertInt8To32(const void* srcInt8Buffer,
+ size_t numElements,
+ float* dstFloat32Buffer)
+{
+ ARMNN_ASSERT(srcInt8Buffer != nullptr);
+ ARMNN_ASSERT(dstFloat32Buffer != nullptr);
+
+ const auto* pInt8 = static_cast<const int8_t*>(srcInt8Buffer);
+
+ for (size_t i = 0; i < numElements; ++i)
+ {
+ dstFloat32Buffer[i] = pInt8[i];
+ }
+}
+
+};
+
+using ConvertConstDequantisationLayersToConstLayers
+ = OptimizeForConnection<ConstantLayer,
+ DequantizeLayer,
+ ConvertConstDequantisationLayersToConstLayersImpl>;
+
+} // namespace optimizations
+} // namespace armnn \ No newline at end of file
diff --git a/src/armnn/test/optimizations/ConvertConstDequantisationLayersToConstLayersTest.cpp b/src/armnn/test/optimizations/ConvertConstDequantisationLayersToConstLayersTest.cpp
new file mode 100644
index 0000000000..926ac2d26d
--- /dev/null
+++ b/src/armnn/test/optimizations/ConvertConstDequantisationLayersToConstLayersTest.cpp
@@ -0,0 +1,105 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "LayersFwd.hpp"
+#include <Network.hpp>
+#include <TestUtils.hpp>
+#include <doctest/doctest.h>
+#include <armnn/backends/TensorHandle.hpp>
+#include <Optimizer.hpp>
+
+TEST_SUITE("Optimizer")
+{
+using namespace armnn;
+using namespace armnn::optimizations;
+
+TEST_CASE("ConvertConstFloat16DequantizeToConstFloat32")
+{
+ Graph graph;
+ const unsigned int shape[] = {1, 2, 2, 3};
+
+ const TensorInfo constTensorInfo(4, shape, DataType::Float16, 1.0, 0, true);
+ const TensorInfo outputDequantizeInfo(4, shape, DataType::Float32, 1.0, 0, true);
+
+ ConstantLayer *constantLayer = graph.AddLayer<ConstantLayer>("constant");
+ std::vector<float> constantValues(constTensorInfo.GetNumElements(), 4.5f);
+ ConstTensor constTensor(constTensorInfo, constantValues.data());
+ constantLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(constTensor);
+ constantLayer->GetOutputSlot().SetTensorInfo(constTensorInfo);
+
+ DequantizeLayer *dequantizeLayer = graph.AddLayer<DequantizeLayer>("dequantize");
+ dequantizeLayer->GetOutputSlot().SetTensorInfo(outputDequantizeInfo);
+
+ OutputLayer *output = graph.AddLayer<OutputLayer>(0, "output");
+
+ // Connect up constant -> dequantize -> output
+ constantLayer->GetOutputSlot().Connect(dequantizeLayer->GetInputSlot(0));
+ dequantizeLayer->GetOutputSlot().Connect(output->GetInputSlot(0));
+
+ auto checkConstantFloat16 = [](const armnn::Layer *const layer) -> bool {
+ return IsLayerOfType<ConstantLayer>(layer) &&
+ (layer->GetDataType() == DataType::Float16);
+ };
+ auto checkConstantFloat32 = [](const armnn::Layer *const layer) -> bool {
+ return IsLayerOfType<ConstantLayer>(layer) &&
+ (layer->GetDataType() == DataType::Float32);
+ };
+
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(),
+ checkConstantFloat16,
+ &IsLayerOfType<DequantizeLayer>,
+ &IsLayerOfType<OutputLayer>));
+
+ armnn::Optimizer::Pass(graph, MakeOptimizations(ConvertConstDequantisationLayersToConstLayers()));
+
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(),
+ checkConstantFloat32,
+ &IsLayerOfType<OutputLayer>));
+}
+
+TEST_CASE("ConvertConstInt8DequantizeToConstFloat32")
+{
+ Graph graph;
+ const unsigned int shape[] = {1, 2, 2, 3};
+
+ const TensorInfo constTensorInfo(4, shape, DataType::QAsymmS8, 1.0, 0, true);
+ const TensorInfo outputDequantizeInfo(4, shape, DataType::Float32, 1.0, 0, true);
+
+ ConstantLayer *constantLayer = graph.AddLayer<ConstantLayer>("constant");
+ std::vector<int8_t> constantValues(constTensorInfo.GetNumElements(), 5);
+ ConstTensor constTensor(constTensorInfo, constantValues.data());
+ constantLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(constTensor);
+ constantLayer->GetOutputSlot().SetTensorInfo(constTensorInfo);
+
+ DequantizeLayer *dequantizeLayer = graph.AddLayer<DequantizeLayer>("dequantize");
+ dequantizeLayer->GetOutputSlot().SetTensorInfo(outputDequantizeInfo);
+
+ OutputLayer *output = graph.AddLayer<OutputLayer>(0, "output");
+
+ // Connect up constant -> dequantize -> output
+ constantLayer->GetOutputSlot().Connect(dequantizeLayer->GetInputSlot(0));
+ dequantizeLayer->GetOutputSlot().Connect(output->GetInputSlot(0));
+
+ auto checkConstantQAsymmS8 = [](const armnn::Layer *const layer) -> bool {
+ return IsLayerOfType<ConstantLayer>(layer) &&
+ (layer->GetDataType() == DataType::QAsymmS8);
+ };
+ auto checkConstantFloat32 = [](const armnn::Layer *const layer) -> bool {
+ return IsLayerOfType<ConstantLayer>(layer) &&
+ (layer->GetDataType() == DataType::Float32);
+ };
+
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(),
+ checkConstantQAsymmS8,
+ &IsLayerOfType<DequantizeLayer>,
+ &IsLayerOfType<OutputLayer>));
+
+ armnn::Optimizer::Pass(graph, MakeOptimizations(ConvertConstDequantisationLayersToConstLayers()));
+
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(),
+ checkConstantFloat32,
+ &IsLayerOfType<OutputLayer>));
+}
+}
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index 2194b487d3..606821b5e5 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -2903,19 +2903,24 @@ void DequantizeQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
const TensorInfo& inputTensorInfo = workloadInfo.m_InputTensorInfos[0];
const TensorInfo& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0];
- if (!IsQuantizedType(inputTensorInfo.GetDataType()))
+ std::vector<DataType> inputSupportedTypes =
{
- throw InvalidArgumentException(descriptorName + ": Input to dequantize layer must be quantized type.");
- }
+ DataType::QAsymmS8,
+ DataType::QAsymmU8,
+ DataType::QSymmS8,
+ DataType::QSymmS16,
+ DataType::Float16
+ };
+ ValidateDataTypes(inputTensorInfo, inputSupportedTypes, descriptorName);
- std::vector<DataType> supportedTypes =
+ std::vector<DataType> outputSupportedTypes =
{
DataType::BFloat16,
DataType::Float32,
DataType::Float16
};
- ValidateDataTypes(outputTensorInfo, supportedTypes, descriptorName);
+ ValidateDataTypes(outputTensorInfo, outputSupportedTypes, descriptorName);
}
void MergeQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const