aboutsummaryrefslogtreecommitdiff
path: root/src/backends
diff options
context:
space:
mode:
authorTracy Narine <tracy.narine@arm.com>2023-09-20 14:19:07 +0100
committerTracy Narine <tracy.narine@arm.com>2023-09-28 14:25:16 +0100
commit6440ce89abb06e090d2b3cf91bafc14277072475 (patch)
treec55682891a0f01f3edbf5dad58720ded7af3fc64 /src/backends
parent9a418d850333119e219fb05addc57b56cdc60a7e (diff)
downloadarmnn-6440ce89abb06e090d2b3cf91bafc14277072475.tar.gz
IVGCVSW-7504 Create a backend specific optimization to fuse ADD+MUL+Add+(Activation) in CpuAcc
* Adding CpuAcc backend optimization to fuse add+mul+add into one layer * Tests added/enhanced * Also added optional extended parameter to Graph::Print() and throw macros that could be used in place of assert Signed-off-by: Tracy Narine <tracy.narine@arm.com> Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com> Change-Id: I5f8d094b969a130d8c2c7b4da07426313a9fea76
Diffstat (limited to 'src/backends')
-rw-r--r--src/backends/aclCommon/ArmComputeSubgraphUtils.hpp21
-rw-r--r--src/backends/backendsCommon/SubgraphUtils.hpp160
-rw-r--r--src/backends/backendsCommon/test/layerTests/AddMulAddTestImpl.hpp18
-rw-r--r--src/backends/neon/CMakeLists.txt1
-rw-r--r--src/backends/neon/NeonBackend.cpp82
-rw-r--r--src/backends/neon/NeonBackendOptimizationUtils.hpp215
-rw-r--r--src/backends/neon/test/NeonLayerTests.cpp2
7 files changed, 493 insertions, 6 deletions
diff --git a/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp b/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp
index 90c0fd5890..a44acb0f54 100644
--- a/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp
+++ b/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp
@@ -356,4 +356,25 @@ void ReplaceLayers(OptimizationViews& optimizationViews,
optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
}
+//
+// Substitute a multi-layer subgraph with one new layer
+//
+template<typename LayerType>
+void ReplaceMultipleLayers(OptimizationViews& optimizationViews,
+ std::vector<IConnectableLayer*>& originalLayers,
+ LayerType* baseLayer,
+ const std::vector<SlotList> inputLayersSlotLists,
+ const std::vector<SlotList> outputLayersSlotLists)
+{
+ std::list<IConnectableLayer*> originalLayerList(originalLayers.begin(), originalLayers.end());
+
+ SubgraphView substitutionSubgraph(
+ std::move(originalLayerList),
+ CreateIInputsFromSlotLists<armnn::IConnectableLayer>(originalLayers, inputLayersSlotLists),
+ CreateIOutputsFromSlotLists<armnn::IConnectableLayer>(originalLayers, outputLayersSlotLists));
+ SubgraphView replacementSubgraph(baseLayer);
+
+ optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
+}
+
} // namespace armnn
diff --git a/src/backends/backendsCommon/SubgraphUtils.hpp b/src/backends/backendsCommon/SubgraphUtils.hpp
index 9f2cdba6ef..6a9e8f1b76 100644
--- a/src/backends/backendsCommon/SubgraphUtils.hpp
+++ b/src/backends/backendsCommon/SubgraphUtils.hpp
@@ -161,6 +161,53 @@ SubgraphView::IOutputSlots CreateIOutputsFrom(const std::vector<armnn::IConnecta
return result;
}
+// Type used to hold the slot numbers to create the lists from. There should
+// be a SlotList for each layer in the layers list
+typedef std::vector<int> SlotList;
+
+template<typename ILayerType>
+SubgraphView::IInputSlots CreateIInputsFromSlotLists(const std::vector<ILayerType*>& layers,
+ const std::vector<SlotList>& layersSlotLists)
+{
+ ARMNN_THROW_INVALIDARG_IF_FALSE(layersSlotLists.size() == layers.size());
+
+ SubgraphView::IInputSlots result;
+
+ for (unsigned int layerIdx = 0; layerIdx < layers.size(); ++layerIdx)
+ {
+ const SlotList& slotList = layersSlotLists[layerIdx];
+ for (unsigned int slotIdx = 0 ; slotIdx < layers[layerIdx]->GetNumInputSlots(); ++slotIdx)
+ {
+ if (std::find(slotList.begin(), slotList.end(), slotIdx) != slotList.end())
+ {
+ result.push_back(&(layers[layerIdx]->GetInputSlot(slotIdx)));
+ }
+ }
+ }
+ return result;
+}
+
+template<typename ILayerType>
+SubgraphView::IOutputSlots CreateIOutputsFromSlotLists(const std::vector<ILayerType*>& layers,
+ const std::vector<SlotList>& layersSlotLists)
+{
+ ARMNN_THROW_INVALIDARG_IF_FALSE(layersSlotLists.size() == layers.size());
+
+ SubgraphView::IOutputSlots result;
+ for (unsigned int layerIdx = 0; layerIdx < layers.size(); ++layerIdx)
+ {
+ const SlotList& slotList = layersSlotLists[layerIdx];
+ for (unsigned int slotIdx = 0; slotIdx < layers[layerIdx]->GetNumOutputSlots(); ++slotIdx)
+ {
+ bool foundIt = std::find(slotList.begin(), slotList.end(), slotIdx) != slotList.end();
+ if (foundIt)
+ {
+ result.push_back(&(layers[layerIdx]->GetOutputSlot(slotIdx)));
+ }
+ }
+ }
+ return result;
+}
}
inline bool IsNCHW(armnn::Layer& layer)
@@ -308,4 +355,117 @@ LayerType* FoldPadIntoAveragePool2d(OptimizationViews& optimizationViews,
return replacementLayer;
}
+//
+// Layer sequence detection such as add + mul + add ( + optional activation )
+//
+
+inline bool IsSequenceLayerType(Layer& layer, LayerType type)
+{
+ return layer.GetType() == type;
+}
+
+inline bool IsSequenceLayerType(Layer& layer, BinaryOperation type)
+{
+ return (layer.GetType() == LayerType::ElementwiseBinary) &&
+ (PolymorphicDowncast<ElementwiseBinaryLayer*>(&layer)->GetParameters().m_Operation == type);
+}
+
+// Detect a layer sequence and activation if specified. The activation must be at the end of the sequence.
+template<typename TYPE>
+bool IsLayerSequence(Layer& currentLayer,
+ TYPE first,
+ TYPE second,
+ TYPE third,
+ Layer* layerList[4],
+ bool handleValidActivates,
+ const std::vector<ActivationFunction>& validActivates)
+{
+ auto PreviousLayer = [](Layer& layer)
+ {
+ return &layer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
+ };
+
+ auto NextLayer = [](Layer& layer)
+ {
+ return &layer.GetOutputSlot(0).GetConnection(0)->GetOwningLayer();
+ };
+
+ auto LayerIncomingConnectionDataType = [](Layer& layer)
+ {
+ return layer.GetInputSlot(0).GetTensorInfo().GetDataType();
+ };
+
+ bool result = false;
+
+ // Match in reverse so there is only 1 connection to check
+ if (IsSequenceLayerType(currentLayer, third))
+ {
+ // Save DataType of third layer
+ DataType dataType = LayerIncomingConnectionDataType(currentLayer);
+
+ // Save third layer
+ layerList[2] = &currentLayer;
+
+ // Check the layers that proceed this one for the requested grouping
+ Layer *prevLayer = PreviousLayer(currentLayer);
+ if (prevLayer && IsSequenceLayerType(*prevLayer, second))
+ {
+ bool dataTypesMatch = (dataType == LayerIncomingConnectionDataType(*prevLayer));
+ if (! dataTypesMatch)
+ {
+ return result;
+ }
+
+ layerList[1] = prevLayer;
+ prevLayer = PreviousLayer(*prevLayer);
+ if (prevLayer && IsSequenceLayerType(*prevLayer, first))
+ {
+ dataTypesMatch = (dataType == LayerIncomingConnectionDataType(*prevLayer));
+ if (! dataTypesMatch)
+ {
+ return result;
+ }
+
+ layerList[0] = prevLayer;
+
+ // Detected the first 3 layers if we get to this point so now
+ // check to see if we have a valid activation. If there is no activation
+ // then the sequence still matches.
+ if (handleValidActivates)
+ {
+ Layer *nextLayer = NextLayer(currentLayer);
+ if (nextLayer)
+ {
+ if (IsSequenceLayerType(*nextLayer, LayerType::Activation))
+ {
+ // This layer is an activation, so it must be a valid type for the sequence
+ ActivationFunction activationFunction =
+ PolymorphicDowncast<ActivationLayer*>(nextLayer)->GetParameters().m_Function;
+ long count = std::count(validActivates.cbegin(),
+ validActivates.cend(),
+ activationFunction);
+ if (count > 0)
+ {
+ layerList[3] = nextLayer;
+ result = true;
+ }
+ }
+ else
+ {
+ // Next layer is not an activation so sequence still matches
+ result = true;
+ }
+ }
+ }
+ else
+ {
+ result = true;
+ }
+ }
+ }
+ }
+
+ return result;
+}
+
} // namespace armnn
diff --git a/src/backends/backendsCommon/test/layerTests/AddMulAddTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/AddMulAddTestImpl.hpp
index 9dece9be3b..39d2219954 100644
--- a/src/backends/backendsCommon/test/layerTests/AddMulAddTestImpl.hpp
+++ b/src/backends/backendsCommon/test/layerTests/AddMulAddTestImpl.hpp
@@ -39,18 +39,18 @@ std::vector<LayerTestResult<T,4>> AddMulAddTest(armnn::IWorkloadFactory& workloa
if (IsQuantizedType<T>())
{
input0TensorInfo.SetQuantizationScale(0.25f);
- input0TensorInfo.SetQuantizationOffset(128);
+ input0TensorInfo.SetQuantizationOffset(10);
input1TensorInfo.SetQuantizationScale(0.25f);
- input1TensorInfo.SetQuantizationOffset(128);
+ input1TensorInfo.SetQuantizationOffset(11);
mulInput1TensorInfo.SetQuantizationScale(0.25f);
- mulInput1TensorInfo.SetQuantizationOffset(128);
+ mulInput1TensorInfo.SetQuantizationOffset(12);
addInput1TensorInfo.SetQuantizationScale(0.25f);
- addInput1TensorInfo.SetQuantizationOffset(128);
+ addInput1TensorInfo.SetQuantizationOffset(13);
output0TensorInfo.SetQuantizationScale(0.5f);
- output0TensorInfo.SetQuantizationOffset(120);
+ output0TensorInfo.SetQuantizationOffset(14);
output1TensorInfo.SetQuantizationScale(0.5f);
- output1TensorInfo.SetQuantizationOffset(120);
+ output1TensorInfo.SetQuantizationOffset(15);
}
std::vector<float> input0Data
@@ -140,6 +140,12 @@ std::vector<LayerTestResult<T,4>> AddMulAddTest(armnn::IWorkloadFactory& workloa
}
AddOutputToWorkload(fusedQueueDescriptor, info, output1TensorInfo, output1Handle.get());
+ if (addOutput)
+ {
+ AddOutputToWorkload(fusedQueueDescriptor, info, output0TensorInfo, output0Handle.get());
+ }
+ AddOutputToWorkload(fusedQueueDescriptor, info, output1TensorInfo, output1Handle.get());
+
std::unique_ptr<IWorkload> workload = workloadFactory.CreateWorkload(LayerType::Fused,
fusedQueueDescriptor,
info);
diff --git a/src/backends/neon/CMakeLists.txt b/src/backends/neon/CMakeLists.txt
index 1c077731c4..8ceeef386b 100644
--- a/src/backends/neon/CMakeLists.txt
+++ b/src/backends/neon/CMakeLists.txt
@@ -8,6 +8,7 @@ if(ARMCOMPUTENEON)
NeonBackend.cpp
NeonBackend.hpp
NeonBackendId.hpp
+ NeonBackendOptimizationUtils.hpp
NeonBackendModelContext.hpp
NeonBackendModelContext.cpp
NeonInterceptorScheduler.hpp
diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp
index b5719db007..7311098631 100644
--- a/src/backends/neon/NeonBackend.cpp
+++ b/src/backends/neon/NeonBackend.cpp
@@ -9,6 +9,7 @@
#include "NeonWorkloadFactory.hpp"
#include "NeonLayerSupport.hpp"
#include "NeonTensorHandleFactory.hpp"
+#include "NeonBackendOptimizationUtils.hpp"
#include <armnn/BackendRegistry.hpp>
#include <armnn/Descriptors.hpp>
@@ -28,6 +29,7 @@
#include <neon/workloads/NeonDepthwiseConvolutionWorkload.hpp>
#include <neon/workloads/NeonDivisionWorkload.hpp>
#include <neon/workloads/NeonFullyConnectedWorkload.hpp>
+#include <neon/workloads/NeonFusedWorkload.hpp>
#include <neon/workloads/NeonMultiplicationWorkload.hpp>
#include <neon/workloads/NeonReduceWorkload.hpp>
#include <neon/workloads/NeonSubtractionWorkload.hpp>
@@ -524,6 +526,86 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph
}
RemoveReshapeLayer(baseLayer, untouched, optimizationViews);
}
+
+ // Replace Add/Mul/Add where possible
+ Layer* layerList[4] = {nullptr, nullptr, nullptr, nullptr};
+ const std::vector<ActivationFunction> validActivates = { ActivationFunction::ReLu,
+ ActivationFunction::BoundedReLu };
+ if (IsLayerSequence<BinaryOperation>(base,
+ BinaryOperation::Add, BinaryOperation::Mul, BinaryOperation::Add,
+ layerList,
+ true, // handleValidActivates
+ validActivates))
+ {
+ bool fuseReLu = false;
+ unsigned int numInputs = 0;
+ unsigned int numOutputs = 0;
+ std::vector<TensorInfo> inputInfos;
+ std::vector<TensorInfo> outputInfos;
+ const ActivationDescriptor* activationDescriptor = nullptr;
+
+ if (BuildAddMulAddTensorInfoLists<Layer>(layerList,
+ numInputs,
+ numOutputs,
+ inputInfos,
+ outputInfos,
+ activationDescriptor,
+ fuseReLu))
+ {
+ // Create the new Add/Mul/Add layer and set the Relu activation function
+ FusedDescriptor fusedDescriptor(numInputs, numOutputs, FusedKernelType::AddMulAdd);
+ arm_compute::Status status = NeonFusedWorkloadValidate({inputInfos.begin(), inputInfos.end()},
+ {outputInfos.begin(), outputInfos.end()},
+ fusedDescriptor,
+ activationDescriptor);
+ if (status)
+ {
+ std::string fusedName;
+ GetFusedName(layerList, fusedName);
+
+ IConnectableLayer* addMulAddLayer =
+ optimizationViews.GetINetwork()->AddFusedLayer(fusedDescriptor, fusedName.c_str());
+
+ if (fuseReLu)
+ {
+ FusedLayer* addMulAddFusedLayer = PolymorphicDowncast<FusedLayer*>(addMulAddLayer);
+ addMulAddFusedLayer->SetAdditionalInfoForObject(
+ std::make_shared<ActivationDescriptor>(*activationDescriptor));
+ }
+
+ // Update the graph
+ std::vector<IConnectableLayer*> originalLayers;
+ for (unsigned int layerIdx = 0; layerIdx < 4; ++layerIdx)
+ {
+ if (layerList[layerIdx])
+ {
+ originalLayers.push_back(layerList[layerIdx]);
+ }
+ }
+
+ std::vector<SlotList> inputLayersSlotLists, outputLayersSlotLists;
+ BuildAddMulAddSlotLists<SlotList>(fuseReLu,
+ outputInfos.size() > 1,
+ inputLayersSlotLists,
+ outputLayersSlotLists);
+
+ ReplaceMultipleLayers<FusedLayer>(optimizationViews,
+ originalLayers,
+ PolymorphicDowncast<FusedLayer*>(addMulAddLayer),
+ inputLayersSlotLists,
+ outputLayersSlotLists);
+
+ // Remove unused layers
+ for (unsigned int layerIdx = 0; layerIdx < 4; ++layerIdx)
+ {
+ if (layerList[layerIdx])
+ {
+ untouched.erase(layerList[layerIdx]->GetGuid());
+ }
+ }
+ }
+ }
+ }
}
if (optimizationViews.GetSubstitutions().empty() && optimizationViews.GetDeletedSubgraphs().empty())
diff --git a/src/backends/neon/NeonBackendOptimizationUtils.hpp b/src/backends/neon/NeonBackendOptimizationUtils.hpp
new file mode 100644
index 0000000000..3a8bf46599
--- /dev/null
+++ b/src/backends/neon/NeonBackendOptimizationUtils.hpp
@@ -0,0 +1,215 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <aclCommon/ArmComputeSubgraphUtils.hpp>
+
+namespace armnn
+{
+
+// Changes shapes of the form [1, 1, ..., W] to [ W ]
+inline bool CollapseLeadingUnitDimensions(const TensorInfo& in, TensorInfo& out)
+{
+ unsigned int numDimensions = in.GetNumDimensions();
+ for (unsigned int i = 0; i < (numDimensions-1); ++i)
+ {
+ if (in.GetShape()[i] != 1)
+ {
+ return false;
+ }
+ }
+
+ unsigned int w = in.GetShape()[numDimensions-1];
+ out = in;
+ out.SetShape({w});
+
+ return true;
+}
+
+//
+// Build slot and tensor info lists for Add/Mul/Add replacement
+//
+template<typename SlotListType>
+void BuildAddMulAddSlotLists(bool handleReLu,
+ bool multipleOutputs,
+ std::vector<SlotListType>& inputLayersSlotLists,
+ std::vector<SlotListType>& outputLayersSlotLists)
+{
+ // Build input slot list
+ inputLayersSlotLists.push_back({0, 1}); // Add
+ inputLayersSlotLists.push_back({1}); // Mul
+ inputLayersSlotLists.push_back({1}); // Add
+ if (handleReLu)
+ {
+ inputLayersSlotLists.push_back({}); // Relu
+ }
+
+ // Build output slot list
+ if (multipleOutputs)
+ {
+ outputLayersSlotLists.push_back({0}); // Add
+ }
+ else
+ {
+ outputLayersSlotLists.push_back({}); // Add
+ }
+ outputLayersSlotLists.push_back({}); // Mul
+ if (handleReLu)
+ {
+ outputLayersSlotLists.push_back({}); // Add
+ outputLayersSlotLists.push_back({0}); // Relu
+ }
+ else
+ {
+ outputLayersSlotLists.push_back({0}); // Add
+ }
+}
+
+inline void GetFusedName(Layer *layerList[4], std::string& fusedName)
+{
+ // Build the fused name string
+ fusedName = "fused";
+ for (unsigned int layerIdx = 0; layerIdx< 4; ++layerIdx)
+ {
+ if (! layerList[layerIdx])
+ {
+ break;
+ }
+ fusedName += "-";
+ fusedName += layerList[layerIdx]->GetNameStr();
+ }
+}
+
+template<typename Type>
+bool BuildAddMulAddTensorInfoLists(Type* layerList[4],
+ unsigned int& numInputs,
+ unsigned int& numOutputs,
+ std::vector<TensorInfo>& inputInfos,
+ std::vector<TensorInfo>& outputInfos,
+ const ActivationDescriptor*& activationDescriptor,
+ bool& fuseReLu)
+{
+ ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[0]);
+ ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[1]);
+ ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[2]);
+
+ ARMNN_THROW_INVALIDARG_IF_FALSE(IsSequenceLayerType(*layerList[0], BinaryOperation::Add));
+ ARMNN_THROW_INVALIDARG_IF_FALSE(IsSequenceLayerType(*layerList[1], BinaryOperation::Mul));
+ ARMNN_THROW_INVALIDARG_IF_FALSE(IsSequenceLayerType(*layerList[2], BinaryOperation::Add));
+
+ fuseReLu = (layerList[3] != nullptr);
+ if (fuseReLu)
+ {
+ activationDescriptor = &PolymorphicDowncast<ActivationLayer *>(layerList[3])->GetParameters();
+ ARMNN_THROW_INVALIDARG_IF_FALSE((activationDescriptor->m_Function == ActivationFunction::ReLu) ||
+ (activationDescriptor->m_Function == ActivationFunction::BoundedReLu));
+ }
+
+ numInputs = 0;
+ numOutputs = 0;
+
+ // Ensure that there are 6 input slots in the add/mul/add layers
+ // we are going to replace
+ unsigned int layerIdx = 0;
+ unsigned int inputSlotCount = 0;
+ for (layerIdx = 0; layerIdx < 3; ++layerIdx)
+ {
+ for (unsigned int slotIdx = 0; slotIdx < layerList[layerIdx]->GetNumInputSlots(); ++slotIdx)
+ {
+ InputSlot* inputSlot = &layerList[layerIdx]->GetInputSlot(slotIdx);
+ OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot();
+ if (outputSlot)
+ {
+ if (layerIdx == 0)
+ {
+ // Always count the input connections of the first add
+ inputInfos.push_back(inputSlot->GetTensorInfo());
+ numInputs++;
+ }
+ else
+ {
+ // For subsequent layers, we skip connections to the previous layers in the counting
+ if (&outputSlot->GetOwningLayer() != layerList[layerIdx-1])
+ {
+ TensorInfo inputSlotInfo = inputSlot->GetTensorInfo();
+ if (numInputs == 2 || numInputs == 3)
+ {
+ // Workaround the broadcast optimization to collapse shapes such as
+ // [1, 1, 1, 2] to [2] as required by backend
+ if (CollapseLeadingUnitDimensions(inputSlot->GetTensorInfo(), inputSlotInfo))
+ {
+ OutputSlot* previousLayerSlot = inputSlot->GetConnectedOutputSlot();
+ if (previousLayerSlot)
+ {
+ if (previousLayerSlot->GetOwningLayer().GetType() == LayerType::Constant)
+ {
+ // First update the TensorInfo in the constant owning layer
+ previousLayerSlot->SetTensorInfo(inputSlotInfo);
+ // Then update the TensorInfo in the workload for the owning layer
+ ConstantLayer* layer = PolymorphicDowncast<ConstantLayer*>(
+ &previousLayerSlot->GetOwningLayer());
+ layer->m_LayerOutput
+ = std::make_unique<ScopedTensorHandle>(
+ ConstTensor(inputSlotInfo,
+ layer->m_LayerOutput.get()->GetConstTensor<void>()));
+ }
+ }
+ }
+ }
+ inputInfos.push_back(inputSlotInfo);
+ numInputs++;
+ }
+ }
+ inputSlotCount++;
+ }
+ }
+ }
+
+ // Check the input counts
+ bool validInputCount = (inputSlotCount == 6) && (inputInfos.size() == 4);
+ if (! validInputCount)
+ {
+ return false;
+ }
+
+ const unsigned int maxIdx = (fuseReLu) ? 4 : 3;
+ for (layerIdx = 0; layerIdx < maxIdx; ++layerIdx)
+ {
+ for (unsigned int slotIdx = 0; slotIdx < layerList[layerIdx]->GetNumOutputSlots(); ++slotIdx)
+ {
+ OutputSlot* outputSlot = &layerList[layerIdx]->GetOutputSlot(slotIdx);
+
+ for (unsigned int connectionIdx = 0; connectionIdx < outputSlot->GetNumConnections(); ++connectionIdx)
+ {
+ InputSlot* inputSlot = outputSlot->GetConnection(connectionIdx);
+ if (layerIdx < (maxIdx-1))
+ {
+ if (&inputSlot->GetOwningLayer() != layerList[layerIdx+1])
+ {
+ outputInfos.push_back(outputSlot->GetTensorInfo());
+ numOutputs++;
+ }
+ }
+ else if (layerList[layerIdx] != nullptr)
+ {
+ outputInfos.push_back(outputSlot->GetTensorInfo());
+ numOutputs++;
+ }
+ }
+ }
+ }
+
+ // Check the output count
+ bool validOutputCount = (outputInfos.size() > 0);
+ if (! validOutputCount)
+ {
+ return false;
+ }
+
+ return true;
+}
+
+}
diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp
index c9dd1ff507..658d718b19 100644
--- a/src/backends/neon/test/NeonLayerTests.cpp
+++ b/src/backends/neon/test/NeonLayerTests.cpp
@@ -1726,9 +1726,11 @@ ARMNN_AUTO_TEST_CASE_WITH_THF(SimpleConvertFp32ToFp16, SimpleConvertFp32ToFp16Te
// AddMulAdd
ARMNN_AUTO_TEST_CASE_WITH_THF(AddMulAdd2OutputsFloat32, AddMulAddTest<DataType::Float32>, true)
+ARMNN_AUTO_TEST_CASE_WITH_THF(AddMulAdd2OutputsInt8, AddMulAddTest<DataType::QAsymmS8>, true)
ARMNN_AUTO_TEST_CASE_WITH_THF(AddMulAdd2OutputsUint8, AddMulAddTest<DataType::QAsymmU8>, true)
ARMNN_AUTO_TEST_CASE_WITH_THF(AddMulAdd1OutputFloat32, AddMulAddTest<DataType::Float32>, false)
+ARMNN_AUTO_TEST_CASE_WITH_THF(AddMulAdd1OutputInt8, AddMulAddTest<DataType::QAsymmS8>, false)
ARMNN_AUTO_TEST_CASE_WITH_THF(AddMulAdd1OutputUint8, AddMulAddTest<DataType::QAsymmU8>, false)
#if defined(ARMNNREF_ENABLED)