aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCathal Corbett <cathal.corbett@arm.com>2022-07-22 16:03:36 +0100
committerCathal Corbett <cathal.corbett@arm.com>2022-07-29 09:50:20 +0000
commitb7e5f53607829c865a11adf8763969399f04515c (patch)
tree6842e15904037d73426d814d5751945b3d9c2376
parent4d3298e21e5bc07bb71cf421d514034b23851bab (diff)
downloadarmnn-b7e5f53607829c865a11adf8763969399f04515c.tar.gz
GitHub #667: Neon fold padding into average pool 2D quantization bug fix.
* Originated from a GitHub issue: https://github.com/ARM-software/armnn/issues/667 * Initially, Arm NN supports the pool 2D operation because there is no padding on the pool2d. Neon failure occurs when padding is followed by average pool 2D due to folding optimization. * Here we prevent the folding optimization from happening for the above special case and add it in as a backend specific optimization. Signed-off-by: Cathal Corbett <cathal.corbett@arm.com> Change-Id: Ia0fd90c3a6b4b9d29c81106f154617d2e893e26b
-rw-r--r--CMakeLists.txt1
-rw-r--r--src/armnn/optimizations/FoldPadIntoLayer2d.hpp43
-rw-r--r--src/armnn/test/optimizations/FoldPadIntoQuantizedAveragePooling2DTests.cpp114
-rw-r--r--src/armnn/test/optimizations/FoldPadTests.cpp64
-rw-r--r--src/backends/aclCommon/ArmComputeSubgraphUtils.hpp46
-rw-r--r--src/backends/backendsCommon/SubgraphUtils.hpp99
-rw-r--r--src/backends/cl/ClBackend.cpp27
-rw-r--r--src/backends/reference/RefBackend.cpp59
-rw-r--r--src/backends/reference/RefBackend.hpp3
9 files changed, 391 insertions, 65 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1d8ebe2952..7061c603a2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -544,6 +544,7 @@ if(BUILD_UNIT_TESTS)
src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp
src/armnn/test/optimizations/ConvertConstantsFloatToHalfTests.cpp
src/armnn/test/optimizations/ConvertConstantsHalfToFloatTests.cpp
+ src/armnn/test/optimizations/FoldPadIntoQuantizedAveragePooling2DTests.cpp
src/armnn/test/optimizations/FoldPadTests.cpp
src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp
src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp
diff --git a/src/armnn/optimizations/FoldPadIntoLayer2d.hpp b/src/armnn/optimizations/FoldPadIntoLayer2d.hpp
index eb6bc90afd..4c4bd80d41 100644
--- a/src/armnn/optimizations/FoldPadIntoLayer2d.hpp
+++ b/src/armnn/optimizations/FoldPadIntoLayer2d.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -73,6 +73,17 @@ inline bool IsNeutralElement(
: tensorValue == GetZeroElement(tensorInfo);
}
+inline bool IsPooling2dPadded(const Pooling2dDescriptor& poolDescriptor)
+{
+ const auto poolingPadValues = std::make_tuple(poolDescriptor.m_PadLeft, poolDescriptor.m_PadRight,
+ poolDescriptor.m_PadTop, poolDescriptor.m_PadBottom);
+ if (poolingPadValues != std::make_tuple(0U, 0U, 0U, 0U))
+ {
+ return true;
+ }
+ return false;
+}
+
template <typename Descriptor>
bool TryFoldPadIntoLayer2d(
const PadDescriptor& padDescriptor, Descriptor& layerDescriptor, const TensorInfo& tensorInfo)
@@ -101,25 +112,29 @@ bool TryFoldPadIntoLayer2d(
return true;
}
-inline bool TryFoldPadIntoLayer2d(
- const PadDescriptor& padDescriptor, Pooling2dDescriptor& poolDescriptor, const TensorInfo& tensorInfo)
+inline bool TryFoldPadIntoLayer2d(const PadDescriptor& padDescriptor,
+ Pooling2dDescriptor& poolDescriptor,
+ const TensorInfo& tensorInfo,
+ bool isBackendOptimization = false)
{
- const auto poolingPadValues = std::make_tuple(poolDescriptor.m_PadLeft, poolDescriptor.m_PadRight,
- poolDescriptor.m_PadTop, poolDescriptor.m_PadBottom);
- bool poolHasPadding = false;
- if (poolingPadValues != std::make_tuple(0U, 0U, 0U, 0U))
+ // Cannot fold Average or L2 pooling if padding exists and the padding method is Exclude.
+ if (poolDescriptor.m_PoolType != PoolingAlgorithm::Max &&
+ IsPooling2dPadded(poolDescriptor) &&
+ poolDescriptor.m_PaddingMethod == PaddingMethod::Exclude)
{
- poolHasPadding = true;
+ return false;
}
- // We cannot fold Average or L2 pooling if there's is already padding and that padding method is Exclude.
- if (poolDescriptor.m_PoolType != PoolingAlgorithm::Max) // PoolingAlgorithm::Average or PoolingAlgorithm::L2
+ // Cannot fold Average pooling if data type is quantized and layout is NHWC in Neon backend.
+ // Therefore, this specific case will become a backend specific optimization.
+ if (!isBackendOptimization &&
+ tensorInfo.IsQuantized() &&
+ poolDescriptor.m_PoolType == PoolingAlgorithm::Average &&
+ poolDescriptor.m_DataLayout == DataLayout::NHWC)
{
- if ((poolHasPadding) && (poolDescriptor.m_PaddingMethod == PaddingMethod::Exclude))
- {
- return false;
- }
+ return false;
}
+
poolDescriptor.m_PaddingMethod = PaddingMethod::IgnoreValue;
return TryFoldPadIntoLayer2d<Pooling2dDescriptor>(padDescriptor, poolDescriptor, tensorInfo);
diff --git a/src/armnn/test/optimizations/FoldPadIntoQuantizedAveragePooling2DTests.cpp b/src/armnn/test/optimizations/FoldPadIntoQuantizedAveragePooling2DTests.cpp
new file mode 100644
index 0000000000..32627c62f7
--- /dev/null
+++ b/src/armnn/test/optimizations/FoldPadIntoQuantizedAveragePooling2DTests.cpp
@@ -0,0 +1,114 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <GraphUtils.hpp>
+#include <TestUtils.hpp>
+
+#include <armnn/INetwork.hpp>
+
+#include <doctest/doctest.h>
+
+using namespace armnn;
+
+namespace
+{
+#if defined(ARMNNREF_ENABLED)||defined(ARMCOMPUTECL_ENABLED)
+void FoldPadIntoQuantizedAvgPoolTest(Compute backendId)
+{
+ // Create a network
+ INetworkPtr network = INetwork::Create();
+
+ const unsigned int inputShape[] = {1, 2, 2, 3};
+ const unsigned int paddedShape[] = {1, 4, 4, 3};
+ const unsigned int outputShape[] = {1, 2, 2, 3};
+
+ TensorInfo inputInfo(4, inputShape, DataType::QAsymmU8, 1.0f, 0.0f);
+ TensorInfo paddedInfo(4, paddedShape, DataType::QAsymmU8, 1.0f, 0.0f);
+ TensorInfo outputInfo(4, outputShape, DataType::QAsymmU8, 1.0f, 0.0f);
+
+ IConnectableLayer* input = network->AddInputLayer(0, "input");
+ input->GetOutputSlot(0).SetTensorInfo(inputInfo);
+
+ PadDescriptor padDescriptor({{0, 0},
+ {1, 1},
+ {1, 1},
+ {0, 0}});
+
+ IConnectableLayer* padLayer = network->AddPadLayer(padDescriptor, "pad");
+ padLayer->GetOutputSlot(0).SetTensorInfo(paddedInfo);
+
+ Pooling2dDescriptor pooling2dDescriptor;
+ pooling2dDescriptor.m_PoolType = PoolingAlgorithm::Average;
+ pooling2dDescriptor.m_PoolWidth = 3;
+ pooling2dDescriptor.m_PoolHeight = 3;
+ pooling2dDescriptor.m_StrideX = 1;
+ pooling2dDescriptor.m_StrideY = 1;
+ pooling2dDescriptor.m_DataLayout = DataLayout::NHWC;
+
+ IConnectableLayer* pool2dLayer = network->AddPooling2dLayer(pooling2dDescriptor, "pool2d");
+ pool2dLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
+
+ IConnectableLayer* output = network->AddOutputLayer(0, "output");
+
+ // Connect up layers - input -> pad -> pool2d -> output
+ input->GetOutputSlot(0).Connect(padLayer->GetInputSlot(0));
+ padLayer->GetOutputSlot(0).Connect(pool2dLayer->GetInputSlot(0));
+ pool2dLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ // Create ArmNN runtime
+ IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions());
+
+ // Optimise ArmNN network
+ IOptimizedNetworkPtr optNet = Optimize(*network, {backendId}, run->GetDeviceSpec());
+
+ auto checkPadFoldedIntoPool2d = [&](const Layer* const layer) {
+ if (!IsLayerOfType<Pooling2dLayer>(layer) || (layer->GetNameStr() != "folded-pad-into-pool2d"))
+ {
+ return false;
+ }
+
+ const auto pool2dLayer = static_cast<const Pooling2dLayer*>(layer);
+ const Pooling2dDescriptor pool2dLayerParams = pool2dLayer->GetParameters();
+
+ Pooling2dDescriptor pool2dLayerParamsNoPad = pool2dLayerParams;
+ pool2dLayerParamsNoPad.m_PadLeft = 0;
+ pool2dLayerParamsNoPad.m_PadRight = 0;
+ pool2dLayerParamsNoPad.m_PadTop = 0;
+ pool2dLayerParamsNoPad.m_PadBottom = 0;
+ // If we fold then PaddingMethod will be set to Ignore. The original will be Exclude.
+ pool2dLayerParamsNoPad.m_PaddingMethod = PaddingMethod::Exclude;
+
+ return (pool2dLayerParamsNoPad == pooling2dDescriptor) && (pool2dLayerParams.m_PadLeft == 1) &&
+ (pool2dLayerParams.m_PadRight == 1) && (pool2dLayerParams.m_PadTop == 1) &&
+ (pool2dLayerParams.m_PadBottom == 1) && (pool2dLayerParams.m_PaddingMethod == PaddingMethod::IgnoreValue);
+ };
+
+ Graph& graph = GetGraphForTesting(optNet.get());
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(),
+ &IsLayerOfType<InputLayer>,
+ checkPadFoldedIntoPool2d,
+ &IsLayerOfType<OutputLayer>));
+}
+#endif
+}
+
+
+TEST_SUITE("Optimizer_FoldPadIntoQuantizedAvgPoolCpuRef")
+{
+TEST_CASE("FoldPadIntoQuantizedAvgPoolCpuRefTest")
+{
+ FoldPadIntoQuantizedAvgPoolTest(Compute::CpuRef);
+}
+}
+
+#if defined(ARMCOMPUTECL_ENABLED)
+TEST_SUITE("Optimizer_FoldPadIntoQuantizedAvgPoolGpuAcc")
+{
+TEST_CASE("FoldPadIntoQuantizedAvgPoolGpuAccTest")
+{
+ FoldPadIntoQuantizedAvgPoolTest(Compute::GpuAcc);
+}
+}
+#endif
diff --git a/src/armnn/test/optimizations/FoldPadTests.cpp b/src/armnn/test/optimizations/FoldPadTests.cpp
index 4d7defcabe..b2672ea584 100644
--- a/src/armnn/test/optimizations/FoldPadTests.cpp
+++ b/src/armnn/test/optimizations/FoldPadTests.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -474,6 +474,68 @@ TEST_CASE("FoldPadLayerIntoPooling2dLayer_MaxPoolingLayerWithLargePadValueShould
&IsLayerOfType<OutputLayer>));
}
+TEST_CASE("FoldPadLayerIntoPooling2dLayer_QuantizedAveragePoolingShouldNotBeFolded")
+{
+ Graph graph;
+ const unsigned int inputShape[] = {1, 2, 2, 3};
+ const unsigned int paddedShape[] = {1, 4, 4, 3};
+ const unsigned int outputShape[] = {1, 2, 2, 3};
+
+ TensorInfo inputInfo(4, inputShape, DataType::QAsymmU8);
+ TensorInfo paddedInfo(4, paddedShape, DataType::QAsymmU8);
+ TensorInfo outputInfo(4, outputShape, DataType::QAsymmU8);
+
+ Layer* input = graph.AddLayer<InputLayer>(0, "input");
+ input->GetOutputSlot().SetTensorInfo(inputInfo);
+
+ PadDescriptor padDescriptor({{0, 0},
+ {1, 1},
+ {1, 1},
+ {0, 0}});
+
+ PadLayer* padLayer = graph.AddLayer<PadLayer>(padDescriptor, "pad");
+ padLayer->GetOutputSlot().SetTensorInfo(paddedInfo);
+
+ Pooling2dDescriptor pooling2dDescriptor;
+ pooling2dDescriptor.m_PoolType = PoolingAlgorithm::Average;
+ pooling2dDescriptor.m_PoolWidth = 3;
+ pooling2dDescriptor.m_PoolHeight = 3;
+ pooling2dDescriptor.m_StrideX = 1;
+ pooling2dDescriptor.m_StrideY = 1;
+ pooling2dDescriptor.m_DataLayout = DataLayout::NHWC;
+
+ Pooling2dLayer* pool2dLayer = graph.AddLayer<Pooling2dLayer>(pooling2dDescriptor, "pool2d");
+ pool2dLayer->GetOutputSlot().SetTensorInfo(outputInfo);
+
+ Layer* output = graph.AddLayer<OutputLayer>(0, "output");
+
+ // Connect up layers - input -> pad -> pool2d -> output
+ input->GetOutputSlot().Connect(padLayer->GetInputSlot(0));
+ padLayer->GetOutputSlot().Connect(pool2dLayer->GetInputSlot(0));
+ pool2dLayer->GetOutputSlot().Connect(output->GetInputSlot(0));
+
+ auto checkSimplePool2d = [&](const Layer* const layer) {
+ const auto pool2dLayer = static_cast<const Pooling2dLayer*>(layer);
+ return IsLayerOfType<Pooling2dLayer>(layer) && (layer->GetNameStr() == "pool2d") &&
+ (pool2dLayer->GetParameters() == pooling2dDescriptor);
+ };
+
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(),
+ &IsLayerOfType<InputLayer>,
+ &IsLayerOfType<PadLayer>,
+ checkSimplePool2d,
+ &IsLayerOfType<OutputLayer>));
+
+ armnn::Optimizer::Pass(graph, MakeOptimizations(FoldPadIntoPooling2d()));
+
+ // The optimization should not have modified the graph.
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(),
+ &IsLayerOfType<InputLayer>,
+ &IsLayerOfType<PadLayer>,
+ checkSimplePool2d,
+ &IsLayerOfType<OutputLayer>));
+}
+
#if defined(ARMNNREF_ENABLED)
TEST_CASE("FoldPadLayerIntoPooling2dLayer_ExecuteInferenceWithAndWithoutOptimization")
{
diff --git a/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp b/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp
index a26442cb86..766bf2d2cc 100644
--- a/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp
+++ b/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -9,6 +9,7 @@
#include <armnn/utility/Assert.hpp>
#include <aclCommon/ArmComputeUtils.hpp>
+#include <backendsCommon/SubgraphUtils.hpp>
namespace armnn
{
@@ -20,36 +21,6 @@ namespace
// this helper only works if all layers where the inputs connect to are not selected
//
-SubgraphView::IInputSlots CreateIInputsFrom(const std::vector<armnn::IConnectableLayer*>& layers)
-{
- SubgraphView::IInputSlots result;
- for (auto&& layer : layers)
- {
- for (unsigned int i = 0 ; i < layer->GetNumInputSlots(); ++i)
- {
- result.push_back(&(layer->GetInputSlot(i)));
- }
- }
- return result;
-}
-
-//
-// this helper only works if all layers where the outputs connect to are not selected
-//
-
-SubgraphView::IOutputSlots CreateIOutputsFrom(const std::vector<armnn::IConnectableLayer*>& layers)
-{
- SubgraphView::IOutputSlots result;
- for (auto &&layer: layers)
- {
- for (unsigned int i = 0; i < layer->GetNumOutputSlots(); ++i)
- {
- result.push_back(&(layer->GetOutputSlot(i)));
- }
- }
- return result;
-}
-
bool checkDataTypeInputandOutput(const Layer& layer)
{
auto inputInfo = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
@@ -79,19 +50,6 @@ bool checkDataTypeInputandOutput(const Layer& layer)
} // namespace
-inline void ReportUntouchedLayers(OptimizationViews& optimizationViews, std::map<LayerGuid, Layer*> untouched)
-{
- std::vector<Layer*> untouchedVector;
- for (const auto& pair : untouched)
- {
- Layer* layer = pair.second;
- SubgraphView subgraphView({layer},
- CreateIInputsFrom({layer}),
- CreateIOutputsFrom({layer}));
- optimizationViews.AddUntouchedSubgraph(std::move(subgraphView));
- }
-}
-
template<typename LayerType>
LayerType* FuseLayer(OptimizationViews& optimizationViews,
LayerType* baseLayer,
diff --git a/src/backends/backendsCommon/SubgraphUtils.hpp b/src/backends/backendsCommon/SubgraphUtils.hpp
new file mode 100644
index 0000000000..bd3d698a98
--- /dev/null
+++ b/src/backends/backendsCommon/SubgraphUtils.hpp
@@ -0,0 +1,99 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <optimizations/FoldPadIntoLayer2d.hpp>
+
+namespace armnn
+{
+
+namespace
+{
+
+//
+// this helper only works if all layers where the inputs connect to are not selected
+//
+
+SubgraphView::IInputSlots CreateIInputsFrom(const std::vector<armnn::IConnectableLayer*>& layers)
+{
+ SubgraphView::IInputSlots result;
+ for (auto&& layer : layers)
+ {
+ for (unsigned int i = 0 ; i < layer->GetNumInputSlots(); ++i)
+ {
+ result.push_back(&(layer->GetInputSlot(i)));
+ }
+ }
+ return result;
+}
+
+//
+// this helper only works if all layers where the outputs connect to are not selected
+//
+
+SubgraphView::IOutputSlots CreateIOutputsFrom(const std::vector<armnn::IConnectableLayer*>& layers)
+{
+ SubgraphView::IOutputSlots result;
+ for (auto &&layer: layers)
+ {
+ for (unsigned int i = 0; i < layer->GetNumOutputSlots(); ++i)
+ {
+ result.push_back(&(layer->GetOutputSlot(i)));
+ }
+ }
+ return result;
+}
+
+}
+
+inline void ReportUntouchedLayers(OptimizationViews& optimizationViews, std::map<LayerGuid, Layer*> untouched)
+{
+ std::vector<Layer*> untouchedVector;
+ for (const auto& pair : untouched)
+ {
+ Layer* layer = pair.second;
+ SubgraphView subgraphView({layer},
+ CreateIInputsFrom({layer}),
+ CreateIOutputsFrom({layer}));
+ optimizationViews.AddUntouchedSubgraph(std::move(subgraphView));
+ }
+}
+
+template<typename LayerType>
+LayerType* FoldPadLayer(OptimizationViews& optimizationViews,
+ LayerType* baseLayer,
+ LayerType* replacementLayer,
+ PadLayer* padLayer)
+{
+ SubgraphView substitutionSubgraph({padLayer, baseLayer},
+ CreateIInputsFrom({padLayer}),
+ CreateIOutputsFrom({baseLayer}));
+ SubgraphView replacementSubgraph(replacementLayer);
+
+ optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
+
+ return replacementLayer;
+}
+
+template<typename LayerType>
+LayerType* FoldPadIntoAveragePool2d(OptimizationViews& optimizationViews,
+ Pooling2dLayer* baseLayer,
+ Pooling2dDescriptor& poolDescriptor,
+ PadLayer* padLayer)
+{
+ IConnectableLayer* replacement =
+ optimizationViews.GetINetwork()->AddPooling2dLayer(poolDescriptor, "folded-pad-into-pool2d");
+ LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
+
+ FoldPadLayer(optimizationViews,
+ baseLayer,
+ replacementLayer,
+ padLayer);
+
+ return replacementLayer;
+}
+
+} // namespace armnn
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp
index 1fe53de62a..d2e8fbfe32 100644
--- a/src/backends/cl/ClBackend.cpp
+++ b/src/backends/cl/ClBackend.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2022 Arm Ltd. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -566,6 +566,31 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
untouched.erase(baseLayer->GetGuid());
}
}
+
+ // Special case to fuse padding into average pooling 2d for quantized datatype.
+ // Required to be done as a backend specific optimization as Neon does not support this special case.
+ if (base.GetType() == LayerType::Pooling2d)
+ {
+ Pooling2dLayer* baseLayer = PolymorphicDowncast<Pooling2dLayer*>(&base);
+ Pooling2dDescriptor poolingDescriptor = baseLayer->GetParameters();
+
+ if (baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer().GetType() == LayerType::Pad)
+ {
+ PadLayer* padLayer = PolymorphicDowncast<PadLayer*>(
+ &baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer());
+ if (padLayer->GetOutputSlot(0).GetNumConnections() == 1 &&
+ optimizations::pad_fold::TryFoldPadIntoLayer2d(padLayer->GetParameters(),
+ poolingDescriptor,
+ padLayer->GetOutputSlot().GetTensorInfo(),
+ true))
+ {
+ FoldPadIntoAveragePool2d<Pooling2dLayer>(optimizationViews, baseLayer,
+ poolingDescriptor, padLayer);
+ untouched.erase(baseLayer->GetGuid());
+ untouched.erase(padLayer->GetGuid());
+ }
+ }
+ }
}
if (optimizationViews.GetSubstitutions().empty())
diff --git a/src/backends/reference/RefBackend.cpp b/src/backends/reference/RefBackend.cpp
index a33a7756a0..8c8879c8be 100644
--- a/src/backends/reference/RefBackend.cpp
+++ b/src/backends/reference/RefBackend.cpp
@@ -14,6 +14,7 @@
#include <armnn/backends/IMemoryManager.hpp>
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <backendsCommon/DefaultAllocator.hpp>
+#include <backendsCommon/SubgraphUtils.hpp>
#include <Optimizer.hpp>
@@ -70,11 +71,61 @@ IBackendInternal::ILayerSupportSharedPtr RefBackend::GetLayerSupport() const
return layerSupport;
}
-OptimizationViews RefBackend::OptimizeSubgraphView(const SubgraphView& subgraph) const
+OptimizationViews RefBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
+ const ModelOptions& modelOptions) const
{
- OptimizationViews optimizationViews;
-
- optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
+ OptimizationViews optimizationViews(modelOptions);
+
+ auto it = subgraph.endIConnectable();
+ std::map<LayerGuid, Layer*> untouched;
+
+ while (it != subgraph.beginIConnectable())
+ {
+ --it;
+ Layer& base = *(PolymorphicDowncast<Layer*>(*it));
+ untouched.insert({base.GetGuid(), &base});
+ }
+
+ it = subgraph.endIConnectable();
+ while (it != subgraph.beginIConnectable())
+ {
+ --it;
+ Layer& base = *(PolymorphicDowncast<Layer*>(*it));
+
+ // Special case to fuse padding into average pooling 2d for quantized datatype.
+ // Required to be done as a backend specific optimization as Neon does not support this special case.
+ if (base.GetType() == LayerType::Pooling2d)
+ {
+ Pooling2dLayer* baseLayer = PolymorphicDowncast<Pooling2dLayer*>(&base);
+ Pooling2dDescriptor poolingDescriptor = baseLayer->GetParameters();
+
+ if (baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer().GetType() == LayerType::Pad)
+ {
+ PadLayer* padLayer = PolymorphicDowncast<PadLayer*>(
+ &baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer());
+ if (padLayer->GetOutputSlot(0).GetNumConnections() == 1 &&
+ optimizations::pad_fold::TryFoldPadIntoLayer2d(padLayer->GetParameters(),
+ poolingDescriptor,
+ padLayer->GetOutputSlot().GetTensorInfo(),
+ true))
+ {
+ FoldPadIntoAveragePool2d<Pooling2dLayer>(optimizationViews, baseLayer,
+ poolingDescriptor, padLayer);
+ untouched.erase(baseLayer->GetGuid());
+ untouched.erase(padLayer->GetGuid());
+ }
+ }
+ }
+ }
+
+ if (optimizationViews.GetSubstitutions().empty())
+ {
+ optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
+ }
+ else
+ {
+ ReportUntouchedLayers(optimizationViews, untouched);
+ }
return optimizationViews;
}
diff --git a/src/backends/reference/RefBackend.hpp b/src/backends/reference/RefBackend.hpp
index 9828d09f51..ecbe4d5ba9 100644
--- a/src/backends/reference/RefBackend.hpp
+++ b/src/backends/reference/RefBackend.hpp
@@ -50,7 +50,8 @@ public:
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override;
- OptimizationViews OptimizeSubgraphView(const SubgraphView& subgraph) const override;
+ OptimizationViews OptimizeSubgraphView(const SubgraphView& subgraph,
+ const ModelOptions& modelOptions) const override;
std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override;