aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/optimizations
diff options
context:
space:
mode:
authortelsoa01 <telmo.soares@arm.com>2018-08-31 09:22:23 +0100
committertelsoa01 <telmo.soares@arm.com>2018-08-31 09:22:23 +0100
commitc577f2c6a3b4ddb6ba87a882723c53a248afbeba (patch)
treebd7d4c148df27f8be6649d313efb24f536b7cf34 /src/armnn/optimizations
parent4c7098bfeab1ffe1cdc77f6c15548d3e73274746 (diff)
downloadarmnn-c577f2c6a3b4ddb6ba87a882723c53a248afbeba.tar.gz
Release 18.08
Diffstat (limited to 'src/armnn/optimizations')
-rw-r--r--src/armnn/optimizations/All.hpp3
-rw-r--r--src/armnn/optimizations/ConvertConstants.hpp98
-rw-r--r--src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp80
-rw-r--r--src/armnn/optimizations/MovePermuteUp.hpp10
-rw-r--r--src/armnn/optimizations/Optimization.hpp7
-rw-r--r--src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp10
-rw-r--r--src/armnn/optimizations/OptimizeInverseConversions.hpp44
-rw-r--r--src/armnn/optimizations/PermuteAsReshape.hpp2
-rw-r--r--src/armnn/optimizations/SquashEqualSiblings.hpp2
9 files changed, 241 insertions, 15 deletions
diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp
index 70f78d44af..0603d44d31 100644
--- a/src/armnn/optimizations/All.hpp
+++ b/src/armnn/optimizations/All.hpp
@@ -4,8 +4,11 @@
//
#pragma once
+#include "ConvertConstants.hpp"
#include "OptimizeInversePermutes.hpp"
#include "PermuteAsReshape.hpp"
#include "OptimizeConsecutiveReshapes.hpp"
#include "SquashEqualSiblings.hpp"
#include "MovePermuteUp.hpp"
+#include "OptimizeInverseConversions.hpp"
+#include "ConvertFp32NetworkToFp16.hpp"
diff --git a/src/armnn/optimizations/ConvertConstants.hpp b/src/armnn/optimizations/ConvertConstants.hpp
new file mode 100644
index 0000000000..d2dd650665
--- /dev/null
+++ b/src/armnn/optimizations/ConvertConstants.hpp
@@ -0,0 +1,98 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "Optimization.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "Half.hpp"
+#include "FloatingPointConverter.hpp"
+
+namespace armnn
+{
+namespace optimizations
+{
+
+struct Float16ToFloat32
+{
+ static void Func(std::unique_ptr<ScopedCpuTensorHandle>& handle)
+ {
+ const TensorInfo& info = handle->GetTensorInfo();
+
+ if (info.GetDataType() == DataType::Float16)
+ {
+ std::vector<float> newValues(info.GetNumElements());
+
+ armnnUtils::FloatingPointConverter::ConvertFloat16To32(handle->GetTensor<Half>(),
+ info.GetNumElements(),
+ newValues.data());
+
+ TensorInfo newInfo(info.GetShape(), DataType::Float32);
+ ConstTensor newInput(newInfo, newValues);
+ handle.reset(new ScopedCpuTensorHandle(newInput));
+ }
+ }
+};
+
+struct Float32ToFloat16
+{
+ static void Func(std::unique_ptr<ScopedCpuTensorHandle>& handle)
+ {
+ const TensorInfo& info = handle->GetTensorInfo();
+
+ if (info.GetDataType() == DataType::Float32)
+ {
+ std::vector<Half> newValues(info.GetNumElements());
+
+ armnnUtils::FloatingPointConverter::ConvertFloat32To16(handle->GetTensor<float>(),
+ info.GetNumElements(),
+ newValues.data());
+
+ TensorInfo newInfo(info.GetShape(), DataType::Float16);
+ ConstTensor newInput(newInfo, newValues);
+ handle.reset(new ScopedCpuTensorHandle(newInput));
+ }
+ }
+};
+
+template<typename Converter, typename Predicate>
+class ConvertConstants : public Optimization
+{
+public:
+ ConvertConstants() = default;
+ ConvertConstants(const ConvertConstants&) = default;
+ virtual ~ConvertConstants() = default;
+
+ void Run(Graph& graph, Layer& layer) const override
+ {
+ if (Predicate::Test(layer))
+ {
+ layer.OperateOnConstantTensors(Converter::Func);
+ }
+ }
+protected:
+};
+
+struct IsFloat32Layer
+{
+ static bool Test(const Layer& layer)
+ {
+ return layer.GetDataType() == DataType::Float32;
+ }
+};
+
+struct IsFloat16Layer
+{
+ static bool Test(const Layer& layer)
+ {
+ return layer.GetDataType() == DataType::Float16;
+ }
+};
+
+using ConvertConstantsHalfToFloat = ConvertConstants<Float16ToFloat32, IsFloat32Layer>;
+using ConvertConstantsFloatToHalf = ConvertConstants<Float32ToFloat16, IsFloat16Layer>;
+
+} //namespace optimizations
+} //namespace armnn
diff --git a/src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp b/src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp
new file mode 100644
index 0000000000..a4df05c18a
--- /dev/null
+++ b/src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp
@@ -0,0 +1,80 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#pragma once
+
+#include "Optimization.hpp"
+#include "NetworkUtils.hpp"
+
+namespace armnn
+{
+namespace optimizations
+{
+
+class ConvertFp32NetworkToFp16Impl
+{
+public:
+
+ void Run(Graph& graph, Layer& layer) const
+ {
+ if(layer.GetType() == LayerType::Input)
+ {
+ // if the outputs of this layer are DataType::Float32
+ // add a ConvertFloat32ToFloat16 layer after each of the outputs
+ if (layer.GetDataType() == DataType::Float32)
+ {
+ InsertConvertFp32ToFp16LayersAfter(graph, layer);
+ }
+ }
+ else if (layer.GetType() == LayerType::Output)
+ {
+ // if the inputs of this layer are DataType::Float32
+ // add a ConvertFloat16ToFloat32 layer before each of the inputs
+ if (layer.GetDataType() == DataType::Float32)
+ {
+ InsertConvertFp16ToFp32LayersBefore(graph, layer);
+ }
+ }
+ else if (layer.GetType() != LayerType::ConvertFp32ToFp16 && layer.GetType() != LayerType::ConvertFp16ToFp32)
+ {
+ // if the inputs/outputs of this layer are DataType::Float32
+ // change the data type for all inputs and outputs to DataType::Float16
+ for (auto&& input = layer.BeginInputSlots(); input != layer.EndInputSlots(); ++input)
+ {
+ // if it is connected to OutputSlot of the InputLayer do not change the DataType of connection
+ // InputSlots of the current layer will be updated when conversion layer is inserted after InputLayer
+ Layer& base = input->GetConnectedOutputSlot()->GetOwningLayer();
+ if (base.GetType() != LayerType::Input)
+ {
+ TensorInfo convertInfo = input->GetConnection()->GetTensorInfo();
+ if (convertInfo.GetDataType() == DataType::Float32)
+ {
+ convertInfo.SetDataType(DataType::Float16);
+ input->GetConnection()->SetTensorInfo(convertInfo);
+ }
+ }
+ }
+
+ // change outputs to DataType::Float16
+ for (auto&& output = layer.BeginOutputSlots(); output != layer.EndOutputSlots(); ++output)
+ {
+ TensorInfo convertInfo = output->GetTensorInfo();
+ if (convertInfo.GetDataType() == DataType::Float32)
+ {
+ convertInfo.SetDataType(DataType::Float16);
+ output->SetTensorInfo(convertInfo);
+ }
+ }
+ }
+ }
+
+protected:
+ ConvertFp32NetworkToFp16Impl() = default;
+ ~ConvertFp32NetworkToFp16Impl() = default;
+};
+
+using Fp32NetworkToFp16Converter = OptimizeForType<Layer, ConvertFp32NetworkToFp16Impl>;
+
+} // namespace optimizations
+} // namespace armnn
diff --git a/src/armnn/optimizations/MovePermuteUp.hpp b/src/armnn/optimizations/MovePermuteUp.hpp
index 8c59986762..a8e18f5add 100644
--- a/src/armnn/optimizations/MovePermuteUp.hpp
+++ b/src/armnn/optimizations/MovePermuteUp.hpp
@@ -31,24 +31,24 @@ public:
auto permute = boost::polymorphic_downcast<PermuteLayer*>(&connection.GetOwningLayer());
const PermutationVector& perm = permute->GetPermutation();
- // Insert an equivalent permute before every input of the base layer.
+ // Inserts an equivalent permute before every input of the base layer.
for (auto baseInput = base.BeginInputSlots(); baseInput != base.EndInputSlots(); ++baseInput)
{
- // Insert new permute layer.
+ // Inserts a new permute layer.
const std::string name = std::string("moved_up-") + permute->GetName();
PermuteLayer& permLayer = *graph.InsertNewLayer<PermuteLayer>(*baseInput, perm, name.c_str());
- // Set output tensor info for the new layer.
+ // Sets output tensor info for the new layer.
OutputSlot& parentOutput = *permLayer.GetInputSlot(0).GetConnectedOutputSlot();
const TensorInfo permOutInfo = armnnUtils::Permuted(parentOutput.GetTensorInfo(), perm);
permLayer.GetOutputHandler().SetTensorInfo(permOutInfo);
}
- // Set permuted output tensor info
+ // Sets permuted output tensor info
const TensorInfo& childOutInfo = permute->GetOutputHandler().GetTensorInfo();
base.GetOutputHandler().SetTensorInfo(childOutInfo);
- // Bypass permute. It will be removed as it's left unconnected.
+ // Bypasses permute. It will be removed as it's left unconnected.
permute->GetOutputSlot().MoveAllConnections(base.GetOutputSlot());
}
}
diff --git a/src/armnn/optimizations/Optimization.hpp b/src/armnn/optimizations/Optimization.hpp
index f81071891b..ee4f91d842 100644
--- a/src/armnn/optimizations/Optimization.hpp
+++ b/src/armnn/optimizations/Optimization.hpp
@@ -13,9 +13,10 @@ namespace armnn
class Optimization
{
public:
+ Optimization() = default;
+ virtual ~Optimization() = default;
virtual void Run(Graph& graph, Layer& base) const = 0;
protected:
- ~Optimization() = default;
};
// Wrappers
@@ -44,7 +45,7 @@ protected:
~OptimizeForTypeImpl() = default;
};
-/// Specialization that calls Wrapped::Run() for any layer type
+/// Specialization that calls Wrapped::Run() for any layer type.
template <typename Wrapped>
class OptimizeForTypeImpl<Layer, Wrapped> : public armnn::Optimization, public Wrapped
{
@@ -90,7 +91,7 @@ public:
}
}
- // Remove unconnected children
+ // Removes unconnected children.
for (unsigned int i = 0; i < output->GetNumConnections();)
{
Layer* child = &output->GetConnection(i)->GetOwningLayer();
diff --git a/src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp b/src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp
index 9a926a57a4..935186d32e 100644
--- a/src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp
+++ b/src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp
@@ -31,19 +31,19 @@ public:
if (inInfo.GetShape() != outInfo.GetShape())
{
- // Insert equivalent reshape before base layer
+ // Inserts equivalent reshape before base layer.
const std::string name = std::string("merged-") + base.GetName() + std::string("-with-") + child.GetName();
const ReshapeDescriptor descriptor{outInfo.GetShape()};
auto& newReshape = *graph.InsertNewLayer<ReshapeLayer>(base.GetInputSlot(0), descriptor, name.c_str());
- // Set tensor info for new layer
+ // Sets tensor info for new layer.
newReshape.GetOutputHandler().SetTensorInfo(outInfo);
- // Reconnect base with original parent
+ // Reconnects base with original parent.
newReshape.GetOutputSlot().MoveAllConnections(*parentOut);
- // Parent is now the new layer
+ // Parent is now the new layer.
parentOut = &newReshape.GetOutputSlot();
}
- // Move connections in child output to parent layer.
+ // Moves connections in child output to parent layer.
// Child layer will be removed as it's left unconnected.
// Base layer will be removed if left unconnected.
child.GetOutputSlot().MoveAllConnections(*parentOut);
diff --git a/src/armnn/optimizations/OptimizeInverseConversions.hpp b/src/armnn/optimizations/OptimizeInverseConversions.hpp
new file mode 100644
index 0000000000..5089d63f2f
--- /dev/null
+++ b/src/armnn/optimizations/OptimizeInverseConversions.hpp
@@ -0,0 +1,44 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#pragma once
+
+#include "Optimization.hpp"
+
+namespace armnn
+{
+namespace optimizations
+{
+
+class OptimizeInverseConversionsImpl
+{
+public:
+ /// Run for every connection between two inverse data type conversion layers, i.e.
+ /// Fp16ToFp32 followed by Fp32ToFp16 or vice-versa.
+ void Run(Graph& graph, InputSlot& connection) const
+ {
+ Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
+ Layer& child = connection.GetOwningLayer();
+
+ BOOST_ASSERT((base.GetType() == LayerType::ConvertFp16ToFp32 &&
+ child.GetType() == LayerType::ConvertFp32ToFp16) ||
+ (base.GetType() == LayerType::ConvertFp32ToFp16 &&
+ child.GetType() == LayerType::ConvertFp16ToFp32));
+
+ // Bypass both conversion layers
+ child.GetOutputSlot().MoveAllConnections(*base.GetInputSlot(0).GetConnectedOutputSlot());
+ }
+
+protected:
+ OptimizeInverseConversionsImpl() = default;
+ ~OptimizeInverseConversionsImpl() = default;
+};
+
+using OptimizeInverseConversionsFp16 =
+ OptimizeForConnection<ConvertFp16ToFp32Layer, ConvertFp32ToFp16Layer, OptimizeInverseConversionsImpl>;
+using OptimizeInverseConversionsFp32 =
+ OptimizeForConnection<ConvertFp32ToFp16Layer, ConvertFp16ToFp32Layer, OptimizeInverseConversionsImpl>;
+
+} // namespace optimizations
+} // namespace armnn
diff --git a/src/armnn/optimizations/PermuteAsReshape.hpp b/src/armnn/optimizations/PermuteAsReshape.hpp
index a8e4c2df5e..736cd5dc98 100644
--- a/src/armnn/optimizations/PermuteAsReshape.hpp
+++ b/src/armnn/optimizations/PermuteAsReshape.hpp
@@ -23,7 +23,7 @@ public:
const std::string name = std::string("as_reshape-") + permute.GetName();
const ReshapeDescriptor descriptor{outInfo.GetShape()};
- // Insert so layers don't need to be re-sorted
+ // Inserts NewLayer so layers don't need to be re-sorted.
auto reshape = graph.InsertNewLayer<ReshapeLayer>(permute.GetInputSlot(0), descriptor, name.c_str());
reshape->GetOutputHandler().SetTensorInfo(outInfo);
diff --git a/src/armnn/optimizations/SquashEqualSiblings.hpp b/src/armnn/optimizations/SquashEqualSiblings.hpp
index c5ce28e723..6e0fa78e4e 100644
--- a/src/armnn/optimizations/SquashEqualSiblings.hpp
+++ b/src/armnn/optimizations/SquashEqualSiblings.hpp
@@ -41,7 +41,7 @@ public:
{
std::swap(sibling, lowestPriorityChild);
}
- // Bypass sibling. It will be removed as it's left unconnected.
+ // Bypasses sibling. It will be removed as it's left unconnected.
auto siblingOut = sibling->BeginOutputSlots();
for (auto lowestPriorityChildOut = lowestPriorityChild->BeginOutputSlots();
lowestPriorityChildOut != lowestPriorityChild->EndOutputSlots(); ++lowestPriorityChildOut)