From 2ab3032ae39d129ef79115cad9084a7074606b59 Mon Sep 17 00:00:00 2001 From: Teresa Charlin Date: Thu, 29 Feb 2024 14:59:30 +0000 Subject: IVGCVSW-8212 Bug Fix: AddMulAdd optimization missing check on second ADD * Failures was: MultiplicationQueueDescriptor: Tensors input_0 & input_1 must have the same number of dimensions in order to be broadcasted * When trying to apply AddMulAdd, Mul dimensions were being collapsed before checking the second ADD. * If the second ADD determined that fusion cannot happened MUL should have not been collapse. Signed-off-by: Teresa Charlin Change-Id: I6dda131c424e1bf7b22fff40c5bef8977f073b24 --- src/backends/neon/NeonBackendOptimizationUtils.hpp | 33 +++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/backends/neon/NeonBackendOptimizationUtils.hpp b/src/backends/neon/NeonBackendOptimizationUtils.hpp index 3a8bf46599..34ab41f09c 100644 --- a/src/backends/neon/NeonBackendOptimizationUtils.hpp +++ b/src/backends/neon/NeonBackendOptimizationUtils.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -100,6 +100,37 @@ bool BuildAddMulAddTensorInfoLists(Type* layerList[4], ARMNN_THROW_INVALIDARG_IF_FALSE(IsSequenceLayerType(*layerList[1], BinaryOperation::Mul)); ARMNN_THROW_INVALIDARG_IF_FALSE(IsSequenceLayerType(*layerList[2], BinaryOperation::Add)); + auto is1D = [](const TensorInfo expanded) + { + TensorInfo collapsed; + if (CollapseLeadingUnitDimensions(expanded, collapsed)) + { + return (collapsed.GetNumDimensions() == 1); + } + else + { + return (expanded.GetNumDimensions() == 1); + } + }; + + // One of the 2 inputs for MUL and the Second ADD must be 1D + // ref: clframework/src/cpu/kernels/CpuAddMulAddKernel.cpp + auto& mulLayer = *(PolymorphicDowncast(layerList[1])); + auto& add2Layer = *(PolymorphicDowncast(layerList[2])); + + Layer& mulInput0 = mulLayer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer(); + Layer& mulInput1 = mulLayer.GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer(); + Layer& add2Input0 = add2Layer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer(); + Layer& add2Input1 = add2Layer.GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer(); + if (!is1D(mulInput0.GetOutputSlot(0).GetTensorInfo()) && !is1D(mulInput1.GetOutputSlot(0).GetTensorInfo())) + { + return false; + } + if (!is1D(add2Input0.GetOutputSlot(0).GetTensorInfo()) && !is1D(add2Input1.GetOutputSlot(0).GetTensorInfo())) + { + return false; + } + fuseReLu = (layerList[3] != nullptr); if (fuseReLu) { -- cgit v1.2.1