diff options
Diffstat (limited to 'src/backends/neon/NeonBackend.cpp')
-rw-r--r-- | src/backends/neon/NeonBackend.cpp | 246 |
1 files changed, 245 insertions, 1 deletions
diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp index 9862ddbd70..150bc345db 100644 --- a/src/backends/neon/NeonBackend.cpp +++ b/src/backends/neon/NeonBackend.cpp @@ -11,7 +11,10 @@ #include "NeonTensorHandleFactory.hpp" #include <armnn/BackendRegistry.hpp> +#include <armnn/Descriptors.hpp> +#include <aclCommon/ArmComputeSubgraphUtils.hpp> +#include <aclCommon/ArmComputeUtils.hpp> #include <aclCommon/BaseMemoryManager.hpp> #include <armnn/backends/IBackendContext.hpp> @@ -19,8 +22,18 @@ #include <armnn/utility/PolymorphicDowncast.hpp> +#include "workloads/NeonAdditionWorkload.hpp" +#include "workloads/NeonBatchNormalizationWorkload.hpp" +#include "workloads/NeonConvolution2dWorkload.hpp" +#include "workloads/NeonDepthwiseConvolutionWorkload.hpp" +#include "workloads/NeonDivisionWorkload.hpp" +#include "workloads/NeonFullyConnectedWorkload.hpp" +#include "workloads/NeonMultiplicationWorkload.hpp" +#include "workloads/NeonSubtractionWorkload.hpp" + #include <Optimizer.hpp> +#include <arm_compute/core/Types.h> #include <arm_compute/runtime/Allocator.h> namespace armnn @@ -122,7 +135,238 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph { OptimizationViews optimizationViews; - optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph)); + auto it = subgraph.end(); + + while (it != subgraph.begin()) + { + --it; + Layer& base = **it; + + if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d + || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected + || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication + || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division) + && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr)) + { + for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output) + { + if (output->GetNumConnections() == 1) + { + for (auto&& childInput : output->GetConnections()) + { + if (childInput->GetOwningLayer().GetType() == LayerType::Activation) + { + Layer& child = childInput->GetOwningLayer(); + + auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child); + + const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") + + base.GetName(); + + // Get params from activation layer + ActivationDescriptor activationDesc = activationLayer->GetParameters(); + + if (base.GetType() == LayerType::Convolution2d) + { + Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base); + + Optional<TensorInfo> biases; + + if (baseLayer->GetParameters().m_BiasEnabled) + { + biases = GetOverriddenDataType(baseLayer->m_Bias->GetTensorInfo(), + GetOptionalBiasTypeFromWeightsType( + baseLayer->m_Weight->GetTensorInfo().GetDataType())); + } + + arm_compute::Status status = NeonConvolution2dWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetParameters(), + baseLayer->m_Weight->GetTensorInfo(), + biases, + false, + &activationDesc); + + if (status) + { + FuseLayerWithWeightsAndBiases<Convolution2dLayer>(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + name); + } + } + else if (base.GetType() == LayerType::DepthwiseConvolution2d) + { + DepthwiseConvolution2dLayer* baseLayer = + PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base); + + Optional<TensorInfo> biases; + + if (baseLayer->GetParameters().m_BiasEnabled) + { + biases = GetOverriddenDataType(baseLayer->m_Bias->GetTensorInfo(), + GetOptionalBiasTypeFromWeightsType( + baseLayer->m_Weight->GetTensorInfo().GetDataType())); + } + + arm_compute::Status status = NeonDepthwiseConvolutionWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetParameters(), + baseLayer->m_Weight->GetTensorInfo(), + biases, + &activationDesc); + + if (status) + { + FuseLayerWithWeightsAndBiases<DepthwiseConvolution2dLayer>(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + name); + } + } + else if (base.GetType() == LayerType::FullyConnected) + { + FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base); + + arm_compute::Status status = NeonFullyConnectedWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->m_Weight->GetTensorInfo(), + baseLayer->m_Bias->GetTensorInfo(), + baseLayer->GetParameters(), + &activationDesc); + + if (status) + { + FuseLayerWithWeightsAndBiases<FullyConnectedLayer>(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + name); + } + } + else if (base.GetType() == LayerType::BatchNormalization) + { + BatchNormalizationLayer* baseLayer = + PolymorphicDowncast<BatchNormalizationLayer*>(&base); + + arm_compute::Status status = NeonBatchNormalizationValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->m_Mean->GetTensorInfo(), + baseLayer->m_Variance->GetTensorInfo(), + baseLayer->m_Beta->GetTensorInfo(), + baseLayer->m_Gamma->GetTensorInfo(), + baseLayer->GetParameters(), + &activationDesc); + + if (status) + { + BatchNormalizationLayer* replacementLayer = + FuseLayerWithParameters<BatchNormalizationLayer>( + optimizationViews, + baseLayer, + activationLayer, + activationDesc, + name); + + replacementLayer->m_Beta = std::move(baseLayer->m_Beta); + replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma); + replacementLayer->m_Mean = std::move(baseLayer->m_Mean); + replacementLayer->m_Variance = std::move(baseLayer->m_Variance); + } + } + else if (base.GetType() == LayerType::Addition) + { + AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base); + + arm_compute::Status status = NeonAdditionWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseLayerWithoutParameters<AdditionLayer>(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + name); + } + } + else if (base.GetType() == LayerType::Division) + { + DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base); + + arm_compute::Status status = NeonDivisionWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseLayerWithoutParameters<DivisionLayer>(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + name); + } + } + else if (base.GetType() == LayerType::Multiplication) + { + MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base); + + arm_compute::Status status = NeonMultiplicationWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseLayerWithoutParameters<MultiplicationLayer>(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + name); + } + } + else if (base.GetType() == LayerType::Subtraction) + { + SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base); + + arm_compute::Status status = NeonSubtractionWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseLayerWithoutParameters<SubtractionLayer>(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + name); + } + } + } + } + } + } + } + } + + if (optimizationViews.GetSubstitutions().empty()) + { + optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph)); + } return optimizationViews; } |