// // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once #include "Optimization.hpp" #include #include #include namespace armnn { namespace optimizations { namespace pad_fold { inline float GetZeroElement(const TensorInfo& tensorInfo) { return static_cast(tensorInfo.IsQuantized() ? tensorInfo.GetQuantizationOffset() : 0); } inline float GetLowestElement(const TensorInfo& tensorInfo) { constexpr float negativeInfinity = -std::numeric_limits::infinity(); const float scale = tensorInfo.GetQuantizationScale(); const int32_t offset = tensorInfo.GetQuantizationOffset(); switch (tensorInfo.GetDataType()) { case DataType::Float16: return armnnUtils::SelectiveQuantize(negativeInfinity, scale, offset); case DataType::Float32: return armnnUtils::SelectiveQuantize(negativeInfinity, scale, offset); case DataType::QAsymmU8: return armnnUtils::SelectiveQuantize(negativeInfinity, scale, offset); case DataType::QSymmS16: return armnnUtils::SelectiveQuantize(negativeInfinity, scale, offset); case DataType::QSymmS8: // Fall-through case DataType::QAsymmS8: return armnnUtils::SelectiveQuantize(negativeInfinity, scale, offset); case DataType::BFloat16: return armnnUtils::SelectiveQuantize(negativeInfinity, scale, offset); default: { ARMNN_ASSERT_MSG(false, "Unsupported DataType"); return NAN; } } } inline bool IsNeutralElement(const Convolution2dDescriptor&, const TensorInfo& tensorInfo, const float tensorValue) { return tensorValue == GetZeroElement(tensorInfo); } inline bool IsNeutralElement(const DepthwiseConvolution2dDescriptor&, const TensorInfo& tensorInfo, const float tensorValue) { return tensorValue == GetZeroElement(tensorInfo); } inline bool IsNeutralElement( const Pooling2dDescriptor& descriptor, const TensorInfo& tensorInfo, const float tensorValue) { return (descriptor.m_PoolType == PoolingAlgorithm::Max) ? tensorValue <= GetLowestElement(tensorInfo) : tensorValue == GetZeroElement(tensorInfo); } template bool TryFoldPadIntoLayer2d( const PadDescriptor& padDescriptor, Descriptor& layerDescriptor, const TensorInfo& tensorInfo) { armnnUtils::DataLayoutIndexed layout = armnnUtils::DataLayoutIndexed(layerDescriptor.m_DataLayout); constexpr unsigned int batchIndex = 0; constexpr auto noPad = std::make_pair(0U, 0U); if ((!IsNeutralElement(layerDescriptor, tensorInfo, padDescriptor.m_PadValue)) || (padDescriptor.m_PadList[batchIndex] != noPad) || (padDescriptor.m_PadList[layout.GetChannelsIndex()] != noPad)) { return false; } const auto& padList = padDescriptor.m_PadList; // In Convolution2dDescriptor/Pooling2dDescriptor, padLeft and padRight are defined as paddings // on width dimension whereas padTop and padBottom - paddings on height dimension, so updating // these according to data layout layerDescriptor.m_PadLeft += padList[layout.GetWidthIndex()].first; layerDescriptor.m_PadRight += padList[layout.GetWidthIndex()].second; layerDescriptor.m_PadTop += padList[layout.GetHeightIndex()].first; layerDescriptor.m_PadBottom += padList[layout.GetHeightIndex()].second; return true; } inline bool TryFoldPadIntoLayer2d( const PadDescriptor& padDescriptor, Pooling2dDescriptor& poolDescriptor, const TensorInfo& tensorInfo) { const auto poolingPadValues = std::make_tuple(poolDescriptor.m_PadLeft, poolDescriptor.m_PadRight, poolDescriptor.m_PadTop, poolDescriptor.m_PadBottom); bool poolHasPadding = false; if (poolingPadValues != std::make_tuple(0U, 0U, 0U, 0U)) { poolHasPadding = true; } // We cannot fold Average or L2 pooling if there's is already padding and that padding method is Exclude. if (poolDescriptor.m_PoolType != PoolingAlgorithm::Max) // PoolingAlgorithm::Average or PoolingAlgorithm::L2 { if ((poolHasPadding) && (poolDescriptor.m_PaddingMethod == PaddingMethod::Exclude)) { return false; } } poolDescriptor.m_PaddingMethod = PaddingMethod::IgnoreValue; return TryFoldPadIntoLayer2d(padDescriptor, poolDescriptor, tensorInfo); } template Layer2dT* FoldPadIntoLayer2dImpl(Graph& graph, InputSlot& connection) { PadLayer& padLayer = *PolymorphicDowncast(&connection.GetConnectedOutputSlot()->GetOwningLayer()); Layer2dT& layer2d = *PolymorphicDowncast(&connection.GetOwningLayer()); const PadDescriptor& padDescriptor = padLayer.GetParameters(); auto newLayer2dDescriptor = layer2d.GetParameters(); if (!TryFoldPadIntoLayer2d(padDescriptor, newLayer2dDescriptor, padLayer.GetOutputSlot().GetTensorInfo())) { return nullptr; } // Save original parent output slot of the pad layer OutputSlot& parentSlot = *padLayer.GetInputSlot(0).GetConnectedOutputSlot(); // Insert new layer2d layer between the pad layer an its parent layer. const std::string name = std::string("folded-") + padLayer.GetName() + "-into-" + layer2d.GetName(); auto& newLayer2d = *graph.InsertNewLayer(padLayer.GetInputSlot(0), newLayer2dDescriptor, name.c_str()); // Reconnect the pad layer with its original parent. newLayer2d.GetOutputSlot().MoveAllConnections(parentSlot); // Moves connections in old layer2d layer output to new layer. // Old layer2d layer will be removed as it's left unconnected. // Pad layer will be removed if left unconnected. layer2d.GetOutputSlot().MoveAllConnections(newLayer2d.GetOutputSlot()); return &newLayer2d; } class FoldPadIntoConvolution2dImpl { public: void Run(Graph& graph, InputSlot& connection) const { const auto newConv2dLayer = FoldPadIntoLayer2dImpl(graph, connection); if (newConv2dLayer != nullptr) { const auto conv2dLayer = PolymorphicDowncast(&connection.GetOwningLayer()); // Copy weights and bias to the new convolution layer ARMNN_ASSERT_MSG(conv2dLayer->m_Weight != nullptr, "FoldPadIntoConvolution2d: Weights data should not be null."); newConv2dLayer->m_Weight = std::move(conv2dLayer->m_Weight); if (conv2dLayer->GetParameters().m_BiasEnabled) { ARMNN_ASSERT_MSG(conv2dLayer->m_Bias != nullptr, "FoldPadIntoConvolution2d: Bias data should not be null if bias is enabled."); newConv2dLayer->m_Bias = std::move(conv2dLayer->m_Bias); } } } protected: FoldPadIntoConvolution2dImpl() = default; ~FoldPadIntoConvolution2dImpl() = default; }; class FoldPadIntoDepthwiseConvolution2dImpl { public: void Run(Graph& graph, InputSlot& connection) const { const auto newConv2dLayer = FoldPadIntoLayer2dImpl(graph, connection); if (newConv2dLayer != nullptr) { const auto conv2dLayer = PolymorphicDowncast(&connection.GetOwningLayer()); // Copy weights and bias to the new convolution layer ARMNN_ASSERT_MSG(conv2dLayer->m_Weight != nullptr, "FoldPadIntoDepthwiseConvolution2d: Weights data should not be null."); newConv2dLayer->m_Weight = std::move(conv2dLayer->m_Weight); if (conv2dLayer->GetParameters().m_BiasEnabled) { ARMNN_ASSERT_MSG(conv2dLayer->m_Bias != nullptr, "FoldPadIntoDepthwiseConvolution2d: Bias data should not be null if bias is enabled."); newConv2dLayer->m_Bias = std::move(conv2dLayer->m_Bias); } } } protected: FoldPadIntoDepthwiseConvolution2dImpl() = default; ~FoldPadIntoDepthwiseConvolution2dImpl() = default; }; class FoldPadIntoPooling2dImpl { public: void Run(Graph& graph, InputSlot& connection) const { FoldPadIntoLayer2dImpl(graph, connection); } protected: FoldPadIntoPooling2dImpl() = default; ~FoldPadIntoPooling2dImpl() = default; }; } // namespace pad_fold using FoldPadIntoConvolution2d = OptimizeForExclusiveConnection; using FoldPadIntoDepthwiseConvolution2d = OptimizeForExclusiveConnection ; using FoldPadIntoPooling2d = OptimizeForExclusiveConnection; } // namespace optimizations } // namespace armnn