diff options
Diffstat (limited to 'src/armnn/optimizations')
-rw-r--r-- | src/armnn/optimizations/All.hpp | 3 | ||||
-rw-r--r-- | src/armnn/optimizations/ConvertConstants.hpp | 98 | ||||
-rw-r--r-- | src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp | 80 | ||||
-rw-r--r-- | src/armnn/optimizations/MovePermuteUp.hpp | 10 | ||||
-rw-r--r-- | src/armnn/optimizations/Optimization.hpp | 7 | ||||
-rw-r--r-- | src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp | 10 | ||||
-rw-r--r-- | src/armnn/optimizations/OptimizeInverseConversions.hpp | 44 | ||||
-rw-r--r-- | src/armnn/optimizations/PermuteAsReshape.hpp | 2 | ||||
-rw-r--r-- | src/armnn/optimizations/SquashEqualSiblings.hpp | 2 |
9 files changed, 241 insertions, 15 deletions
diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp index 70f78d44af..0603d44d31 100644 --- a/src/armnn/optimizations/All.hpp +++ b/src/armnn/optimizations/All.hpp @@ -4,8 +4,11 @@ // #pragma once +#include "ConvertConstants.hpp" #include "OptimizeInversePermutes.hpp" #include "PermuteAsReshape.hpp" #include "OptimizeConsecutiveReshapes.hpp" #include "SquashEqualSiblings.hpp" #include "MovePermuteUp.hpp" +#include "OptimizeInverseConversions.hpp" +#include "ConvertFp32NetworkToFp16.hpp" diff --git a/src/armnn/optimizations/ConvertConstants.hpp b/src/armnn/optimizations/ConvertConstants.hpp new file mode 100644 index 0000000000..d2dd650665 --- /dev/null +++ b/src/armnn/optimizations/ConvertConstants.hpp @@ -0,0 +1,98 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "Optimization.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "Half.hpp" +#include "FloatingPointConverter.hpp" + +namespace armnn +{ +namespace optimizations +{ + +struct Float16ToFloat32 +{ + static void Func(std::unique_ptr<ScopedCpuTensorHandle>& handle) + { + const TensorInfo& info = handle->GetTensorInfo(); + + if (info.GetDataType() == DataType::Float16) + { + std::vector<float> newValues(info.GetNumElements()); + + armnnUtils::FloatingPointConverter::ConvertFloat16To32(handle->GetTensor<Half>(), + info.GetNumElements(), + newValues.data()); + + TensorInfo newInfo(info.GetShape(), DataType::Float32); + ConstTensor newInput(newInfo, newValues); + handle.reset(new ScopedCpuTensorHandle(newInput)); + } + } +}; + +struct Float32ToFloat16 +{ + static void Func(std::unique_ptr<ScopedCpuTensorHandle>& handle) + { + const TensorInfo& info = handle->GetTensorInfo(); + + if (info.GetDataType() == DataType::Float32) + { + std::vector<Half> newValues(info.GetNumElements()); + + armnnUtils::FloatingPointConverter::ConvertFloat32To16(handle->GetTensor<float>(), + info.GetNumElements(), + newValues.data()); + + TensorInfo newInfo(info.GetShape(), DataType::Float16); + ConstTensor newInput(newInfo, newValues); + handle.reset(new ScopedCpuTensorHandle(newInput)); + } + } +}; + +template<typename Converter, typename Predicate> +class ConvertConstants : public Optimization +{ +public: + ConvertConstants() = default; + ConvertConstants(const ConvertConstants&) = default; + virtual ~ConvertConstants() = default; + + void Run(Graph& graph, Layer& layer) const override + { + if (Predicate::Test(layer)) + { + layer.OperateOnConstantTensors(Converter::Func); + } + } +protected: +}; + +struct IsFloat32Layer +{ + static bool Test(const Layer& layer) + { + return layer.GetDataType() == DataType::Float32; + } +}; + +struct IsFloat16Layer +{ + static bool Test(const Layer& layer) + { + return layer.GetDataType() == DataType::Float16; + } +}; + +using ConvertConstantsHalfToFloat = ConvertConstants<Float16ToFloat32, IsFloat32Layer>; +using ConvertConstantsFloatToHalf = ConvertConstants<Float32ToFloat16, IsFloat16Layer>; + +} //namespace optimizations +} //namespace armnn diff --git a/src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp b/src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp new file mode 100644 index 0000000000..a4df05c18a --- /dev/null +++ b/src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp @@ -0,0 +1,80 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "Optimization.hpp" +#include "NetworkUtils.hpp" + +namespace armnn +{ +namespace optimizations +{ + +class ConvertFp32NetworkToFp16Impl +{ +public: + + void Run(Graph& graph, Layer& layer) const + { + if(layer.GetType() == LayerType::Input) + { + // if the outputs of this layer are DataType::Float32 + // add a ConvertFloat32ToFloat16 layer after each of the outputs + if (layer.GetDataType() == DataType::Float32) + { + InsertConvertFp32ToFp16LayersAfter(graph, layer); + } + } + else if (layer.GetType() == LayerType::Output) + { + // if the inputs of this layer are DataType::Float32 + // add a ConvertFloat16ToFloat32 layer before each of the inputs + if (layer.GetDataType() == DataType::Float32) + { + InsertConvertFp16ToFp32LayersBefore(graph, layer); + } + } + else if (layer.GetType() != LayerType::ConvertFp32ToFp16 && layer.GetType() != LayerType::ConvertFp16ToFp32) + { + // if the inputs/outputs of this layer are DataType::Float32 + // change the data type for all inputs and outputs to DataType::Float16 + for (auto&& input = layer.BeginInputSlots(); input != layer.EndInputSlots(); ++input) + { + // if it is connected to OutputSlot of the InputLayer do not change the DataType of connection + // InputSlots of the current layer will be updated when conversion layer is inserted after InputLayer + Layer& base = input->GetConnectedOutputSlot()->GetOwningLayer(); + if (base.GetType() != LayerType::Input) + { + TensorInfo convertInfo = input->GetConnection()->GetTensorInfo(); + if (convertInfo.GetDataType() == DataType::Float32) + { + convertInfo.SetDataType(DataType::Float16); + input->GetConnection()->SetTensorInfo(convertInfo); + } + } + } + + // change outputs to DataType::Float16 + for (auto&& output = layer.BeginOutputSlots(); output != layer.EndOutputSlots(); ++output) + { + TensorInfo convertInfo = output->GetTensorInfo(); + if (convertInfo.GetDataType() == DataType::Float32) + { + convertInfo.SetDataType(DataType::Float16); + output->SetTensorInfo(convertInfo); + } + } + } + } + +protected: + ConvertFp32NetworkToFp16Impl() = default; + ~ConvertFp32NetworkToFp16Impl() = default; +}; + +using Fp32NetworkToFp16Converter = OptimizeForType<Layer, ConvertFp32NetworkToFp16Impl>; + +} // namespace optimizations +} // namespace armnn diff --git a/src/armnn/optimizations/MovePermuteUp.hpp b/src/armnn/optimizations/MovePermuteUp.hpp index 8c59986762..a8e18f5add 100644 --- a/src/armnn/optimizations/MovePermuteUp.hpp +++ b/src/armnn/optimizations/MovePermuteUp.hpp @@ -31,24 +31,24 @@ public: auto permute = boost::polymorphic_downcast<PermuteLayer*>(&connection.GetOwningLayer()); const PermutationVector& perm = permute->GetPermutation(); - // Insert an equivalent permute before every input of the base layer. + // Inserts an equivalent permute before every input of the base layer. for (auto baseInput = base.BeginInputSlots(); baseInput != base.EndInputSlots(); ++baseInput) { - // Insert new permute layer. + // Inserts a new permute layer. const std::string name = std::string("moved_up-") + permute->GetName(); PermuteLayer& permLayer = *graph.InsertNewLayer<PermuteLayer>(*baseInput, perm, name.c_str()); - // Set output tensor info for the new layer. + // Sets output tensor info for the new layer. OutputSlot& parentOutput = *permLayer.GetInputSlot(0).GetConnectedOutputSlot(); const TensorInfo permOutInfo = armnnUtils::Permuted(parentOutput.GetTensorInfo(), perm); permLayer.GetOutputHandler().SetTensorInfo(permOutInfo); } - // Set permuted output tensor info + // Sets permuted output tensor info const TensorInfo& childOutInfo = permute->GetOutputHandler().GetTensorInfo(); base.GetOutputHandler().SetTensorInfo(childOutInfo); - // Bypass permute. It will be removed as it's left unconnected. + // Bypasses permute. It will be removed as it's left unconnected. permute->GetOutputSlot().MoveAllConnections(base.GetOutputSlot()); } } diff --git a/src/armnn/optimizations/Optimization.hpp b/src/armnn/optimizations/Optimization.hpp index f81071891b..ee4f91d842 100644 --- a/src/armnn/optimizations/Optimization.hpp +++ b/src/armnn/optimizations/Optimization.hpp @@ -13,9 +13,10 @@ namespace armnn class Optimization { public: + Optimization() = default; + virtual ~Optimization() = default; virtual void Run(Graph& graph, Layer& base) const = 0; protected: - ~Optimization() = default; }; // Wrappers @@ -44,7 +45,7 @@ protected: ~OptimizeForTypeImpl() = default; }; -/// Specialization that calls Wrapped::Run() for any layer type +/// Specialization that calls Wrapped::Run() for any layer type. template <typename Wrapped> class OptimizeForTypeImpl<Layer, Wrapped> : public armnn::Optimization, public Wrapped { @@ -90,7 +91,7 @@ public: } } - // Remove unconnected children + // Removes unconnected children. for (unsigned int i = 0; i < output->GetNumConnections();) { Layer* child = &output->GetConnection(i)->GetOwningLayer(); diff --git a/src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp b/src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp index 9a926a57a4..935186d32e 100644 --- a/src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp +++ b/src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp @@ -31,19 +31,19 @@ public: if (inInfo.GetShape() != outInfo.GetShape()) { - // Insert equivalent reshape before base layer + // Inserts equivalent reshape before base layer. const std::string name = std::string("merged-") + base.GetName() + std::string("-with-") + child.GetName(); const ReshapeDescriptor descriptor{outInfo.GetShape()}; auto& newReshape = *graph.InsertNewLayer<ReshapeLayer>(base.GetInputSlot(0), descriptor, name.c_str()); - // Set tensor info for new layer + // Sets tensor info for new layer. newReshape.GetOutputHandler().SetTensorInfo(outInfo); - // Reconnect base with original parent + // Reconnects base with original parent. newReshape.GetOutputSlot().MoveAllConnections(*parentOut); - // Parent is now the new layer + // Parent is now the new layer. parentOut = &newReshape.GetOutputSlot(); } - // Move connections in child output to parent layer. + // Moves connections in child output to parent layer. // Child layer will be removed as it's left unconnected. // Base layer will be removed if left unconnected. child.GetOutputSlot().MoveAllConnections(*parentOut); diff --git a/src/armnn/optimizations/OptimizeInverseConversions.hpp b/src/armnn/optimizations/OptimizeInverseConversions.hpp new file mode 100644 index 0000000000..5089d63f2f --- /dev/null +++ b/src/armnn/optimizations/OptimizeInverseConversions.hpp @@ -0,0 +1,44 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "Optimization.hpp" + +namespace armnn +{ +namespace optimizations +{ + +class OptimizeInverseConversionsImpl +{ +public: + /// Run for every connection between two inverse data type conversion layers, i.e. + /// Fp16ToFp32 followed by Fp32ToFp16 or vice-versa. + void Run(Graph& graph, InputSlot& connection) const + { + Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer(); + Layer& child = connection.GetOwningLayer(); + + BOOST_ASSERT((base.GetType() == LayerType::ConvertFp16ToFp32 && + child.GetType() == LayerType::ConvertFp32ToFp16) || + (base.GetType() == LayerType::ConvertFp32ToFp16 && + child.GetType() == LayerType::ConvertFp16ToFp32)); + + // Bypass both conversion layers + child.GetOutputSlot().MoveAllConnections(*base.GetInputSlot(0).GetConnectedOutputSlot()); + } + +protected: + OptimizeInverseConversionsImpl() = default; + ~OptimizeInverseConversionsImpl() = default; +}; + +using OptimizeInverseConversionsFp16 = + OptimizeForConnection<ConvertFp16ToFp32Layer, ConvertFp32ToFp16Layer, OptimizeInverseConversionsImpl>; +using OptimizeInverseConversionsFp32 = + OptimizeForConnection<ConvertFp32ToFp16Layer, ConvertFp16ToFp32Layer, OptimizeInverseConversionsImpl>; + +} // namespace optimizations +} // namespace armnn diff --git a/src/armnn/optimizations/PermuteAsReshape.hpp b/src/armnn/optimizations/PermuteAsReshape.hpp index a8e4c2df5e..736cd5dc98 100644 --- a/src/armnn/optimizations/PermuteAsReshape.hpp +++ b/src/armnn/optimizations/PermuteAsReshape.hpp @@ -23,7 +23,7 @@ public: const std::string name = std::string("as_reshape-") + permute.GetName(); const ReshapeDescriptor descriptor{outInfo.GetShape()}; - // Insert so layers don't need to be re-sorted + // Inserts NewLayer so layers don't need to be re-sorted. auto reshape = graph.InsertNewLayer<ReshapeLayer>(permute.GetInputSlot(0), descriptor, name.c_str()); reshape->GetOutputHandler().SetTensorInfo(outInfo); diff --git a/src/armnn/optimizations/SquashEqualSiblings.hpp b/src/armnn/optimizations/SquashEqualSiblings.hpp index c5ce28e723..6e0fa78e4e 100644 --- a/src/armnn/optimizations/SquashEqualSiblings.hpp +++ b/src/armnn/optimizations/SquashEqualSiblings.hpp @@ -41,7 +41,7 @@ public: { std::swap(sibling, lowestPriorityChild); } - // Bypass sibling. It will be removed as it's left unconnected. + // Bypasses sibling. It will be removed as it's left unconnected. auto siblingOut = sibling->BeginOutputSlots(); for (auto lowestPriorityChildOut = lowestPriorityChild->BeginOutputSlots(); lowestPriorityChildOut != lowestPriorityChild->EndOutputSlots(); ++lowestPriorityChildOut) |