aboutsummaryrefslogtreecommitdiff
path: root/src/backends/neon/NeonBackend.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/neon/NeonBackend.cpp')
-rw-r--r--src/backends/neon/NeonBackend.cpp246
1 files changed, 245 insertions, 1 deletions
diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp
index 9862ddbd70..150bc345db 100644
--- a/src/backends/neon/NeonBackend.cpp
+++ b/src/backends/neon/NeonBackend.cpp
@@ -11,7 +11,10 @@
#include "NeonTensorHandleFactory.hpp"
#include <armnn/BackendRegistry.hpp>
+#include <armnn/Descriptors.hpp>
+#include <aclCommon/ArmComputeSubgraphUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
#include <aclCommon/BaseMemoryManager.hpp>
#include <armnn/backends/IBackendContext.hpp>
@@ -19,8 +22,18 @@
#include <armnn/utility/PolymorphicDowncast.hpp>
+#include "workloads/NeonAdditionWorkload.hpp"
+#include "workloads/NeonBatchNormalizationWorkload.hpp"
+#include "workloads/NeonConvolution2dWorkload.hpp"
+#include "workloads/NeonDepthwiseConvolutionWorkload.hpp"
+#include "workloads/NeonDivisionWorkload.hpp"
+#include "workloads/NeonFullyConnectedWorkload.hpp"
+#include "workloads/NeonMultiplicationWorkload.hpp"
+#include "workloads/NeonSubtractionWorkload.hpp"
+
#include <Optimizer.hpp>
+#include <arm_compute/core/Types.h>
#include <arm_compute/runtime/Allocator.h>
namespace armnn
@@ -122,7 +135,238 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph
{
OptimizationViews optimizationViews;
- optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
+ auto it = subgraph.end();
+
+ while (it != subgraph.begin())
+ {
+ --it;
+ Layer& base = **it;
+
+ if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d
+ || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected
+ || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication
+ || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division)
+ && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
+ {
+ for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
+ {
+ if (output->GetNumConnections() == 1)
+ {
+ for (auto&& childInput : output->GetConnections())
+ {
+ if (childInput->GetOwningLayer().GetType() == LayerType::Activation)
+ {
+ Layer& child = childInput->GetOwningLayer();
+
+ auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
+
+ const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
+ base.GetName();
+
+ // Get params from activation layer
+ ActivationDescriptor activationDesc = activationLayer->GetParameters();
+
+ if (base.GetType() == LayerType::Convolution2d)
+ {
+ Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);
+
+ Optional<TensorInfo> biases;
+
+ if (baseLayer->GetParameters().m_BiasEnabled)
+ {
+ biases = GetOverriddenDataType(baseLayer->m_Bias->GetTensorInfo(),
+ GetOptionalBiasTypeFromWeightsType(
+ baseLayer->m_Weight->GetTensorInfo().GetDataType()));
+ }
+
+ arm_compute::Status status = NeonConvolution2dWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetParameters(),
+ baseLayer->m_Weight->GetTensorInfo(),
+ biases,
+ false,
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithWeightsAndBiases<Convolution2dLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::DepthwiseConvolution2d)
+ {
+ DepthwiseConvolution2dLayer* baseLayer =
+ PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
+
+ Optional<TensorInfo> biases;
+
+ if (baseLayer->GetParameters().m_BiasEnabled)
+ {
+ biases = GetOverriddenDataType(baseLayer->m_Bias->GetTensorInfo(),
+ GetOptionalBiasTypeFromWeightsType(
+ baseLayer->m_Weight->GetTensorInfo().GetDataType()));
+ }
+
+ arm_compute::Status status = NeonDepthwiseConvolutionWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetParameters(),
+ baseLayer->m_Weight->GetTensorInfo(),
+ biases,
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithWeightsAndBiases<DepthwiseConvolution2dLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::FullyConnected)
+ {
+ FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
+
+ arm_compute::Status status = NeonFullyConnectedWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->m_Weight->GetTensorInfo(),
+ baseLayer->m_Bias->GetTensorInfo(),
+ baseLayer->GetParameters(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithWeightsAndBiases<FullyConnectedLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::BatchNormalization)
+ {
+ BatchNormalizationLayer* baseLayer =
+ PolymorphicDowncast<BatchNormalizationLayer*>(&base);
+
+ arm_compute::Status status = NeonBatchNormalizationValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->m_Mean->GetTensorInfo(),
+ baseLayer->m_Variance->GetTensorInfo(),
+ baseLayer->m_Beta->GetTensorInfo(),
+ baseLayer->m_Gamma->GetTensorInfo(),
+ baseLayer->GetParameters(),
+ &activationDesc);
+
+ if (status)
+ {
+ BatchNormalizationLayer* replacementLayer =
+ FuseLayerWithParameters<BatchNormalizationLayer>(
+ optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+
+ replacementLayer->m_Beta = std::move(baseLayer->m_Beta);
+ replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma);
+ replacementLayer->m_Mean = std::move(baseLayer->m_Mean);
+ replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
+ }
+ }
+ else if (base.GetType() == LayerType::Addition)
+ {
+ AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
+
+ arm_compute::Status status = NeonAdditionWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithoutParameters<AdditionLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::Division)
+ {
+ DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
+
+ arm_compute::Status status = NeonDivisionWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithoutParameters<DivisionLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::Multiplication)
+ {
+ MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);
+
+ arm_compute::Status status = NeonMultiplicationWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithoutParameters<MultiplicationLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::Subtraction)
+ {
+ SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
+
+ arm_compute::Status status = NeonSubtractionWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithoutParameters<SubtractionLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (optimizationViews.GetSubstitutions().empty())
+ {
+ optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
+ }
return optimizationViews;
}