diff options
Diffstat (limited to 'src/armnn/optimizations/FoldPadIntoLayer2d.hpp')
-rw-r--r-- | src/armnn/optimizations/FoldPadIntoLayer2d.hpp | 204 |
1 files changed, 204 insertions, 0 deletions
diff --git a/src/armnn/optimizations/FoldPadIntoLayer2d.hpp b/src/armnn/optimizations/FoldPadIntoLayer2d.hpp new file mode 100644 index 0000000000..637f2b36d3 --- /dev/null +++ b/src/armnn/optimizations/FoldPadIntoLayer2d.hpp @@ -0,0 +1,204 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "Optimization.hpp" + +#include <QuantizeHelper.hpp> + +#include <armnn/utility/PolymorphicDowncast.hpp> +#include <armnnUtils/DataLayoutIndexed.hpp> + +namespace armnn +{ +namespace optimizations +{ +namespace pad_fold +{ +inline float GetZeroElement(const TensorInfo& tensorInfo) +{ + return static_cast<float>(tensorInfo.IsQuantized() ? tensorInfo.GetQuantizationOffset() : 0); +} + +inline float GetLowestElement(const TensorInfo& tensorInfo) +{ + constexpr float negativeInfinity = -std::numeric_limits<float>::infinity(); + const float scale = tensorInfo.GetQuantizationScale(); + const int32_t offset = tensorInfo.GetQuantizationOffset(); + + switch (tensorInfo.GetDataType()) + { + case DataType::Float16: + return armnnUtils::SelectiveQuantize<armnn::Half>(negativeInfinity, scale, offset); + case DataType::Float32: + return armnnUtils::SelectiveQuantize<float>(negativeInfinity, scale, offset); + case DataType::QAsymmU8: + return armnnUtils::SelectiveQuantize<uint8_t>(negativeInfinity, scale, offset); + case DataType::QSymmS16: + return armnnUtils::SelectiveQuantize<int16_t>(negativeInfinity, scale, offset); + case DataType::QSymmS8: + // Fall-through + case DataType::QAsymmS8: + return armnnUtils::SelectiveQuantize<int8_t>(negativeInfinity, scale, offset); + case DataType::BFloat16: + return armnnUtils::SelectiveQuantize<armnn::BFloat16>(negativeInfinity, scale, offset); + default: + { + ARMNN_ASSERT_MSG(false, "Unsupported DataType"); + return NAN; + } + } +} + +inline bool IsNeutralElement(const Convolution2dDescriptor&, const TensorInfo& tensorInfo, const float tensorValue) +{ + return tensorValue == GetZeroElement(tensorInfo); +} + +inline bool IsNeutralElement( + const Pooling2dDescriptor& descriptor, const TensorInfo& tensorInfo, const float tensorValue) +{ + return (descriptor.m_PoolType == PoolingAlgorithm::Max) + ? tensorValue <= GetLowestElement(tensorInfo) + : tensorValue == GetZeroElement(tensorInfo); +} + +template <typename Descriptor> +bool TryFoldPadIntoLayer2d( + const PadDescriptor& padDescriptor, Descriptor& layerDescriptor, const TensorInfo& tensorInfo) +{ + armnnUtils::DataLayoutIndexed layout = armnnUtils::DataLayoutIndexed(layerDescriptor.m_DataLayout); + constexpr unsigned int batchIndex = 0; + + constexpr auto noPad = std::make_pair(0U, 0U); + + if ((!IsNeutralElement(layerDescriptor, tensorInfo, padDescriptor.m_PadValue)) || + (padDescriptor.m_PadList[batchIndex] != noPad) || (padDescriptor.m_PadList[layout.GetChannelsIndex()] != noPad)) + { + return false; + } + + const auto& padList = padDescriptor.m_PadList; + + // In Convolution2dDescriptor/Pooling2dDescriptor, padLeft and padRight are defined as paddings + // on width dimension whereas padTop and padBottom - paddings on height dimension, so updating + // these according to data layout + layerDescriptor.m_PadLeft += padList[layout.GetWidthIndex()].first; + layerDescriptor.m_PadRight += padList[layout.GetWidthIndex()].second; + layerDescriptor.m_PadTop += padList[layout.GetHeightIndex()].first; + layerDescriptor.m_PadBottom += padList[layout.GetHeightIndex()].second; + + return true; +} + +inline bool TryFoldPadIntoLayer2d( + const PadDescriptor& padDescriptor, Pooling2dDescriptor& poolDescriptor, const TensorInfo& tensorInfo) +{ + const auto poolingPadValues = std::make_tuple(poolDescriptor.m_PadLeft, poolDescriptor.m_PadRight, + poolDescriptor.m_PadTop, poolDescriptor.m_PadBottom); + bool poolHasPadding = false; + if (poolingPadValues != std::make_tuple(0U, 0U, 0U, 0U)) + { + poolHasPadding = true; + } + + // We cannot fold Average or L2 pooling if there's is already padding and that padding method is Exclude. + if (poolDescriptor.m_PoolType != PoolingAlgorithm::Max) // PoolingAlgorithm::Average or PoolingAlgorithm::L2 + { + if ((poolHasPadding) && (poolDescriptor.m_PaddingMethod == PaddingMethod::Exclude)) + { + return false; + } + } + poolDescriptor.m_PaddingMethod = PaddingMethod::IgnoreValue; + + return TryFoldPadIntoLayer2d<Pooling2dDescriptor>(padDescriptor, poolDescriptor, tensorInfo); +} + +template <typename Layer2dT> +Layer2dT* FoldPadIntoLayer2dImpl(Graph& graph, InputSlot& connection) +{ + PadLayer& padLayer = *PolymorphicDowncast<PadLayer*>(&connection.GetConnectedOutputSlot()->GetOwningLayer()); + Layer2dT& layer2d = *PolymorphicDowncast<Layer2dT*>(&connection.GetOwningLayer()); + + const PadDescriptor& padDescriptor = padLayer.GetParameters(); + auto newLayer2dDescriptor = layer2d.GetParameters(); + + if (!TryFoldPadIntoLayer2d(padDescriptor, newLayer2dDescriptor, padLayer.GetOutputSlot().GetTensorInfo())) + { + return nullptr; + } + + // Save original parent output slot of the pad layer + OutputSlot& parentSlot = *padLayer.GetInputSlot(0).GetConnectedOutputSlot(); + + // Insert new layer2d layer between the pad layer an its parent layer. + const std::string name = std::string("folded-") + padLayer.GetName() + "-into-" + layer2d.GetName(); + auto& newLayer2d = *graph.InsertNewLayer<Layer2dT>(padLayer.GetInputSlot(0), newLayer2dDescriptor, name.c_str()); + + // Reconnect the pad layer with its original parent. + newLayer2d.GetOutputSlot().MoveAllConnections(parentSlot); + + // Moves connections in old layer2d layer output to new layer. + // Old layer2d layer will be removed as it's left unconnected. + // Pad layer will be removed if left unconnected. + layer2d.GetOutputSlot().MoveAllConnections(newLayer2d.GetOutputSlot()); + + return &newLayer2d; +} + +class FoldPadIntoConvolution2dImpl +{ +public: + void Run(Graph& graph, InputSlot& connection) const + { + const auto newConv2dLayer = FoldPadIntoLayer2dImpl<Convolution2dLayer>(graph, connection); + + if (newConv2dLayer != nullptr) + { + const auto conv2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&connection.GetOwningLayer()); + // Copy weights and bias to the new convolution layer + ARMNN_ASSERT_MSG(conv2dLayer->m_Weight != nullptr, + "FoldPadIntoConvolution2d: Weights data should not be null."); + newConv2dLayer->m_Weight = std::move(conv2dLayer->m_Weight); + + if (conv2dLayer->GetParameters().m_BiasEnabled) + { + ARMNN_ASSERT_MSG(conv2dLayer->m_Bias != nullptr, + "FoldPadIntoConvolution2d: Bias data should not be null if bias is enabled."); + newConv2dLayer->m_Bias = std::move(conv2dLayer->m_Bias); + } + } + } + +protected: + FoldPadIntoConvolution2dImpl() = default; + ~FoldPadIntoConvolution2dImpl() = default; +}; + +class FoldPadIntoPooling2dImpl +{ +public: + void Run(Graph& graph, InputSlot& connection) const + { + FoldPadIntoLayer2dImpl<Pooling2dLayer>(graph, connection); + } + +protected: + FoldPadIntoPooling2dImpl() = default; + ~FoldPadIntoPooling2dImpl() = default; +}; +} // namespace pad_fold + +using FoldPadIntoConvolution2d = + OptimizeForExclusiveConnection<PadLayer, Convolution2dLayer, pad_fold::FoldPadIntoConvolution2dImpl>; +using FoldPadIntoPooling2d = + OptimizeForExclusiveConnection<PadLayer, Pooling2dLayer, pad_fold::FoldPadIntoPooling2dImpl>; + +} // namespace optimizations +} // namespace armnn + + |