aboutsummaryrefslogtreecommitdiff
path: root/src/backends/neon
diff options
context:
space:
mode:
authorMike Kelly <mike.kelly@arm.com>2020-11-12 10:58:48 +0000
committerJim Flynn <jim.flynn@arm.com>2020-11-13 14:25:30 +0000
commit07810fc2fcdd34db74222d90cc73ef12a88e7b78 (patch)
tree8becef8453674822d079815b06ae37310b97d2cf /src/backends/neon
parent8502adeafbbb1db0acefa62560d93453e38dcadb (diff)
downloadarmnn-07810fc2fcdd34db74222d90cc73ef12a88e7b78.tar.gz
IVGCVSW-5328-5329 Fuse Activation
* Added Fused Activation Optimization to both CL and Neon backends. * Added Fused Activation support to all the CL and Neon workloads that support it. * Changed ProfilingTest network to be a Convolution layer followed by an Abs layer rather than an Activation layer. * Added IBackendInternal::OptimizeSubgraphView function that can accept a ModelOptions. * Network will now call OptimizeSubgraphView passing in the ModelOptions. Signed-off-by: Keith Davis <keith.davis@arm.com> Signed-off-by: Mike Kelly <mike.kelly@arm.com> Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com> Change-Id: Ib536ac3cbafc7d9b35c139ad9a65b7735262cd9d
Diffstat (limited to 'src/backends/neon')
-rw-r--r--src/backends/neon/NeonBackend.cpp246
-rw-r--r--src/backends/neon/NeonLayerSupport.cpp27
-rw-r--r--src/backends/neon/workloads/NeonAdditionWorkload.cpp15
-rw-r--r--src/backends/neon/workloads/NeonAdditionWorkload.hpp4
-rw-r--r--src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp17
-rw-r--r--src/backends/neon/workloads/NeonBatchNormalizationWorkload.hpp3
-rw-r--r--src/backends/neon/workloads/NeonConvolution2dWorkload.cpp15
-rw-r--r--src/backends/neon/workloads/NeonConvolution2dWorkload.hpp3
-rw-r--r--src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp25
-rw-r--r--src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp4
-rw-r--r--src/backends/neon/workloads/NeonDivisionWorkload.cpp20
-rw-r--r--src/backends/neon/workloads/NeonDivisionWorkload.hpp5
-rw-r--r--src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp16
-rw-r--r--src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp3
-rw-r--r--src/backends/neon/workloads/NeonMultiplicationWorkload.cpp16
-rw-r--r--src/backends/neon/workloads/NeonMultiplicationWorkload.hpp4
-rw-r--r--src/backends/neon/workloads/NeonSubtractionWorkload.cpp17
-rw-r--r--src/backends/neon/workloads/NeonSubtractionWorkload.hpp4
18 files changed, 389 insertions, 55 deletions
diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp
index 9862ddbd70..150bc345db 100644
--- a/src/backends/neon/NeonBackend.cpp
+++ b/src/backends/neon/NeonBackend.cpp
@@ -11,7 +11,10 @@
#include "NeonTensorHandleFactory.hpp"
#include <armnn/BackendRegistry.hpp>
+#include <armnn/Descriptors.hpp>
+#include <aclCommon/ArmComputeSubgraphUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
#include <aclCommon/BaseMemoryManager.hpp>
#include <armnn/backends/IBackendContext.hpp>
@@ -19,8 +22,18 @@
#include <armnn/utility/PolymorphicDowncast.hpp>
+#include "workloads/NeonAdditionWorkload.hpp"
+#include "workloads/NeonBatchNormalizationWorkload.hpp"
+#include "workloads/NeonConvolution2dWorkload.hpp"
+#include "workloads/NeonDepthwiseConvolutionWorkload.hpp"
+#include "workloads/NeonDivisionWorkload.hpp"
+#include "workloads/NeonFullyConnectedWorkload.hpp"
+#include "workloads/NeonMultiplicationWorkload.hpp"
+#include "workloads/NeonSubtractionWorkload.hpp"
+
#include <Optimizer.hpp>
+#include <arm_compute/core/Types.h>
#include <arm_compute/runtime/Allocator.h>
namespace armnn
@@ -122,7 +135,238 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph
{
OptimizationViews optimizationViews;
- optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
+ auto it = subgraph.end();
+
+ while (it != subgraph.begin())
+ {
+ --it;
+ Layer& base = **it;
+
+ if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d
+ || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected
+ || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication
+ || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division)
+ && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
+ {
+ for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
+ {
+ if (output->GetNumConnections() == 1)
+ {
+ for (auto&& childInput : output->GetConnections())
+ {
+ if (childInput->GetOwningLayer().GetType() == LayerType::Activation)
+ {
+ Layer& child = childInput->GetOwningLayer();
+
+ auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
+
+ const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
+ base.GetName();
+
+ // Get params from activation layer
+ ActivationDescriptor activationDesc = activationLayer->GetParameters();
+
+ if (base.GetType() == LayerType::Convolution2d)
+ {
+ Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);
+
+ Optional<TensorInfo> biases;
+
+ if (baseLayer->GetParameters().m_BiasEnabled)
+ {
+ biases = GetOverriddenDataType(baseLayer->m_Bias->GetTensorInfo(),
+ GetOptionalBiasTypeFromWeightsType(
+ baseLayer->m_Weight->GetTensorInfo().GetDataType()));
+ }
+
+ arm_compute::Status status = NeonConvolution2dWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetParameters(),
+ baseLayer->m_Weight->GetTensorInfo(),
+ biases,
+ false,
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithWeightsAndBiases<Convolution2dLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::DepthwiseConvolution2d)
+ {
+ DepthwiseConvolution2dLayer* baseLayer =
+ PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
+
+ Optional<TensorInfo> biases;
+
+ if (baseLayer->GetParameters().m_BiasEnabled)
+ {
+ biases = GetOverriddenDataType(baseLayer->m_Bias->GetTensorInfo(),
+ GetOptionalBiasTypeFromWeightsType(
+ baseLayer->m_Weight->GetTensorInfo().GetDataType()));
+ }
+
+ arm_compute::Status status = NeonDepthwiseConvolutionWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetParameters(),
+ baseLayer->m_Weight->GetTensorInfo(),
+ biases,
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithWeightsAndBiases<DepthwiseConvolution2dLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::FullyConnected)
+ {
+ FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
+
+ arm_compute::Status status = NeonFullyConnectedWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->m_Weight->GetTensorInfo(),
+ baseLayer->m_Bias->GetTensorInfo(),
+ baseLayer->GetParameters(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithWeightsAndBiases<FullyConnectedLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::BatchNormalization)
+ {
+ BatchNormalizationLayer* baseLayer =
+ PolymorphicDowncast<BatchNormalizationLayer*>(&base);
+
+ arm_compute::Status status = NeonBatchNormalizationValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->m_Mean->GetTensorInfo(),
+ baseLayer->m_Variance->GetTensorInfo(),
+ baseLayer->m_Beta->GetTensorInfo(),
+ baseLayer->m_Gamma->GetTensorInfo(),
+ baseLayer->GetParameters(),
+ &activationDesc);
+
+ if (status)
+ {
+ BatchNormalizationLayer* replacementLayer =
+ FuseLayerWithParameters<BatchNormalizationLayer>(
+ optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+
+ replacementLayer->m_Beta = std::move(baseLayer->m_Beta);
+ replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma);
+ replacementLayer->m_Mean = std::move(baseLayer->m_Mean);
+ replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
+ }
+ }
+ else if (base.GetType() == LayerType::Addition)
+ {
+ AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
+
+ arm_compute::Status status = NeonAdditionWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithoutParameters<AdditionLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::Division)
+ {
+ DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
+
+ arm_compute::Status status = NeonDivisionWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithoutParameters<DivisionLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::Multiplication)
+ {
+ MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);
+
+ arm_compute::Status status = NeonMultiplicationWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithoutParameters<MultiplicationLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::Subtraction)
+ {
+ SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
+
+ arm_compute::Status status = NeonSubtractionWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithoutParameters<SubtractionLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (optimizationViews.GetSubstitutions().empty())
+ {
+ optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
+ }
return optimizationViews;
}
diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp
index 0084dbd03f..f55d1c8df6 100644
--- a/src/backends/neon/NeonLayerSupport.cpp
+++ b/src/backends/neon/NeonLayerSupport.cpp
@@ -167,7 +167,8 @@ bool NeonLayerSupport::IsAdditionSupported(const TensorInfo& input0,
reasonIfUnsupported,
input0,
input1,
- output);
+ output,
+ nullptr);
}
bool NeonLayerSupport::IsArgMinMaxSupported(const TensorInfo& input,
@@ -199,7 +200,8 @@ bool NeonLayerSupport::IsBatchNormalizationSupported(const TensorInfo& input,
var,
beta,
gamma,
- descriptor);
+ descriptor,
+ nullptr);
}
bool NeonLayerSupport::IsBatchToSpaceNdSupported(const TensorInfo& input,
@@ -345,7 +347,8 @@ bool NeonLayerSupport::IsConvolution2dSupported(const TensorInfo& input,
descriptor,
weights,
biases,
- isFastMathEnabled);
+ isFastMathEnabled,
+ nullptr);
}
bool NeonLayerSupport::IsDepthToSpaceSupported(const TensorInfo& input,
@@ -373,7 +376,8 @@ bool NeonLayerSupport::IsDepthwiseConvolutionSupported(const TensorInfo& input,
output,
descriptor,
weights,
- biases);
+ biases,
+ nullptr);
}
bool NeonLayerSupport::IsDequantizeSupported(const TensorInfo& input,
@@ -399,7 +403,8 @@ bool NeonLayerSupport::IsDilatedDepthwiseConvolutionSupported(const TensorInfo&
output,
descriptor,
weights,
- biases);
+ biases,
+ nullptr);
}
bool NeonLayerSupport::IsElementwiseUnarySupported(const TensorInfo& input,
@@ -474,7 +479,8 @@ bool NeonLayerSupport::IsFullyConnectedSupported(const TensorInfo& input,
output,
weights,
biases,
- descriptor);
+ descriptor,
+ nullptr);
}
bool NeonLayerSupport::IsGatherSupported(const TensorInfo& input0,
@@ -611,7 +617,8 @@ bool NeonLayerSupport::IsMultiplicationSupported(const TensorInfo& input0,
reasonIfUnsupported,
input0,
input1,
- output);
+ output,
+ nullptr);
}
bool NeonLayerSupport::IsDivisionSupported(const TensorInfo& input0,
@@ -623,7 +630,8 @@ bool NeonLayerSupport::IsDivisionSupported(const TensorInfo& input0,
reasonIfUnsupported,
input0,
input1,
- output);
+ output,
+ nullptr);
}
bool NeonLayerSupport::IsNormalizationSupported(const TensorInfo& input,
@@ -911,7 +919,8 @@ bool NeonLayerSupport::IsSubtractionSupported(const TensorInfo& input0,
reasonIfUnsupported,
input0,
input1,
- output);
+ output,
+ nullptr);
}
bool NeonLayerSupport::IsTransposeConvolution2dSupported(const TensorInfo& input,
diff --git a/src/backends/neon/workloads/NeonAdditionWorkload.cpp b/src/backends/neon/workloads/NeonAdditionWorkload.cpp
index cb0c8a471f..9300b317a9 100644
--- a/src/backends/neon/workloads/NeonAdditionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonAdditionWorkload.cpp
@@ -7,6 +7,8 @@
#include "NeonWorkloadUtils.hpp"
#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
+
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <backendsCommon/CpuTensorHandle.hpp>
@@ -17,16 +19,21 @@ namespace armnn
arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output)
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
return arm_compute::NEArithmeticAddition::validate(&aclInput0,
&aclInput1,
&aclOutput,
- arm_compute::ConvertPolicy::SATURATE);
+ arm_compute::ConvertPolicy::SATURATE,
+ activationInfo);
}
@@ -40,8 +47,10 @@ NeonAdditionWorkload::NeonAdditionWorkload(const AdditionQueueDescriptor& descri
arm_compute::ITensor& input2 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
auto layer = std::make_unique<arm_compute::NEArithmeticAddition>();
- layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE);
+ layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE, activationInfo);
m_AddLayer.reset(layer.release());
}
diff --git a/src/backends/neon/workloads/NeonAdditionWorkload.hpp b/src/backends/neon/workloads/NeonAdditionWorkload.hpp
index 826fb1f3dd..8e43cbdb6d 100644
--- a/src/backends/neon/workloads/NeonAdditionWorkload.hpp
+++ b/src/backends/neon/workloads/NeonAdditionWorkload.hpp
@@ -8,6 +8,7 @@
#include <backendsCommon/Workload.hpp>
#include <arm_compute/core/Error.h>
+#include <arm_compute/core/Types.h>
#include <arm_compute/runtime/IFunction.h>
namespace armnn
@@ -15,7 +16,8 @@ namespace armnn
arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output);
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor = nullptr);
class NeonAdditionWorkload : public BaseWorkload<AdditionQueueDescriptor>
{
diff --git a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp
index ff777dbf9b..33480faf69 100644
--- a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp
+++ b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp
@@ -8,7 +8,10 @@
#include "NeonWorkloadUtils.hpp"
#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
+
#include <armnn/utility/PolymorphicDowncast.hpp>
+
#include <backendsCommon/CpuTensorHandle.hpp>
#include <arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h>
@@ -24,7 +27,8 @@ arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input,
const TensorInfo& var,
const TensorInfo& beta,
const TensorInfo& gamma,
- const BatchNormalizationDescriptor& descriptor)
+ const BatchNormalizationDescriptor& descriptor,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInputInfo =
armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
@@ -39,13 +43,17 @@ arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input,
const arm_compute::TensorInfo aclGammaInfo =
armcomputetensorutils::BuildArmComputeTensorInfo(gamma, descriptor.m_DataLayout);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
return arm_compute::NEBatchNormalizationLayer::validate(&aclInputInfo,
&aclOutputInfo,
&aclMeanInfo,
&aclVarInfo,
&aclBetaInfo,
&aclGammaInfo,
- descriptor.m_Eps);
+ descriptor.m_Eps,
+ activationInfo);
}
NeonBatchNormalizationWorkload::NeonBatchNormalizationWorkload(
@@ -73,6 +81,8 @@ NeonBatchNormalizationWorkload::NeonBatchNormalizationWorkload(
m_Beta = std::make_unique<arm_compute::Tensor>();
BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo());
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
auto layer = std::make_unique<arm_compute::NEBatchNormalizationLayer>();
layer->configure(&input,
&output,
@@ -80,7 +90,8 @@ NeonBatchNormalizationWorkload::NeonBatchNormalizationWorkload(
m_Variance.get(),
m_Beta.get(),
m_Gamma.get(),
- m_Data.m_Parameters.m_Eps);
+ m_Data.m_Parameters.m_Eps,
+ activationInfo);
m_Layer.reset(layer.release());
InitializeArmComputeTensorData(*m_Mean, m_Data.m_Mean);
diff --git a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.hpp b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.hpp
index 3619ea0d73..fea778fb1c 100644
--- a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.hpp
+++ b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.hpp
@@ -21,7 +21,8 @@ arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input,
const TensorInfo& var,
const TensorInfo& beta,
const TensorInfo& gamma,
- const BatchNormalizationDescriptor& descriptor);
+ const BatchNormalizationDescriptor& descriptor,
+ const ActivationDescriptor* activationDescriptor = nullptr);
class NeonBatchNormalizationWorkload : public BaseWorkload<BatchNormalizationQueueDescriptor>
{
diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
index af6f1aee78..fd8be17dfd 100644
--- a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
+++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
@@ -6,6 +6,7 @@
#include "NeonConvolution2dWorkload.hpp"
#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <backendsCommon/CpuTensorHandle.hpp>
#include <neon/workloads/NeonWorkloadUtils.hpp>
@@ -25,7 +26,8 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input,
const Convolution2dDescriptor& descriptor,
const TensorInfo& weights,
const Optional<TensorInfo>& biases,
- bool isFastMathEnabled)
+ bool isFastMathEnabled,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
@@ -47,6 +49,9 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input,
arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
return arm_compute::NEConvolutionLayer::validate(&aclInputInfo,
&aclWeightsInfo,
optionalAclBiasesInfo,
@@ -54,7 +59,7 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input,
layerInfo,
arm_compute::WeightsInfo(),
aclDilationInfo,
- arm_compute::ActivationLayerInfo(),
+ activationInfo,
isFastMathEnabled);
}
@@ -92,6 +97,8 @@ NeonConvolution2dWorkload::NeonConvolution2dWorkload(
const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(m_Data.m_Parameters.m_DilationX,
m_Data.m_Parameters.m_DilationY);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
auto convolutionLayer = std::make_unique<arm_compute::NEConvolutionLayer>(memoryManager);
convolutionLayer->configure(&input,
m_KernelTensor.get(),
@@ -100,7 +107,7 @@ NeonConvolution2dWorkload::NeonConvolution2dWorkload(
padStrideInfo,
arm_compute::WeightsInfo(),
aclDilationInfo,
- arm_compute::ActivationLayerInfo(),
+ activationInfo,
isFastMathEnabled);
m_ConvolutionMethod =
@@ -110,7 +117,7 @@ NeonConvolution2dWorkload::NeonConvolution2dWorkload(
padStrideInfo,
arm_compute::WeightsInfo(),
aclDilationInfo,
- arm_compute::ActivationLayerInfo(),
+ activationInfo,
isFastMathEnabled);
m_ConvolutionLayer.reset(convolutionLayer.release());
diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
index 860d78ba7e..4b6e58ce41 100644
--- a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
+++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
@@ -21,7 +21,8 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input,
const Convolution2dDescriptor& descriptor,
const TensorInfo& weights,
const Optional<TensorInfo>& biases,
- bool isFastMathEnabled = false);
+ bool isFastMathEnabled = false,
+ const ActivationDescriptor* activationDescriptor = nullptr);
class NeonConvolution2dWorkload : public BaseWorkload<Convolution2dQueueDescriptor>
{
diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
index a9a3c75bfd..db6bcc3ecb 100644
--- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
@@ -10,6 +10,7 @@
#include <armnnUtils/DataLayoutIndexed.hpp>
#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
#include <neon/NeonLayerSupport.hpp>
@@ -29,7 +30,8 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i
const TensorInfo& output,
const DepthwiseConvolution2dDescriptor& descriptor,
const TensorInfo& weights,
- const Optional<TensorInfo>& biases)
+ const Optional<TensorInfo>& biases,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
@@ -59,13 +61,16 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i
const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
descriptor.m_DilationX,descriptor.m_DilationY);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo,
&aclWeightsInfo,
optionalAclBiasesInfo,
&aclOutputInfo,
aclPadStrideInfo,
aclDepthMultiplier,
- arm_compute::ActivationLayerInfo(),
+ activationInfo,
aclDilationInfo);
}
@@ -116,16 +121,18 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer>();
static_cast<arm_compute::NEDepthwiseConvolutionLayer*>(
m_pDepthwiseConvolutionLayer.get())->configure(&input,
- m_KernelTensor.get(),
- m_BiasTensor.get(),
- &output,
- padStrideInfo,
- depthMultiplier,
- arm_compute::ActivationLayerInfo(),
- aclDilationInfo);
+ m_KernelTensor.get(),
+ m_BiasTensor.get(),
+ &output,
+ padStrideInfo,
+ depthMultiplier,
+ activationInfo,
+ aclDilationInfo);
ARMNN_ASSERT(m_pDepthwiseConvolutionLayer);
diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp
index 85932d3f9a..d257b91638 100644
--- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp
+++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp
@@ -19,7 +19,9 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i
const TensorInfo& output,
const DepthwiseConvolution2dDescriptor& descriptor,
const TensorInfo& weights,
- const Optional<TensorInfo>& biases);
+ const Optional<TensorInfo>& biases,
+ const ActivationDescriptor* activationDescriptor
+ = nullptr);
class NeonDepthwiseConvolutionWorkload : public BaseWorkload<DepthwiseConvolution2dQueueDescriptor>
{
diff --git a/src/backends/neon/workloads/NeonDivisionWorkload.cpp b/src/backends/neon/workloads/NeonDivisionWorkload.cpp
index fc353f136d..1a26d9510a 100644
--- a/src/backends/neon/workloads/NeonDivisionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDivisionWorkload.cpp
@@ -6,23 +6,31 @@
#include "NeonDivisionWorkload.hpp"
#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
+
#include <armnn/utility/PolymorphicDowncast.hpp>
+
#include <backendsCommon/CpuTensorHandle.hpp>
namespace armnn
{
arm_compute::Status NeonDivisionWorkloadValidate(const TensorInfo& input0,
- const TensorInfo& input1,
- const TensorInfo& output)
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
return arm_compute::NEElementwiseDivision::validate(&aclInput0,
- &aclInput1,
- &aclOutput);
+ &aclInput1,
+ &aclOutput,
+ activationInfo);
}
NeonDivisionWorkload::NeonDivisionWorkload(const DivisionQueueDescriptor& descriptor,
@@ -35,7 +43,9 @@ NeonDivisionWorkload::NeonDivisionWorkload(const DivisionQueueDescriptor& descri
arm_compute::ITensor& input1 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_DivLayer.configure(&input0, &input1, &output);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
+ m_DivLayer.configure(&input0, &input1, &output, activationInfo);
}
void NeonDivisionWorkload::Execute() const
diff --git a/src/backends/neon/workloads/NeonDivisionWorkload.hpp b/src/backends/neon/workloads/NeonDivisionWorkload.hpp
index 2405d9a4ab..fffe02fc00 100644
--- a/src/backends/neon/workloads/NeonDivisionWorkload.hpp
+++ b/src/backends/neon/workloads/NeonDivisionWorkload.hpp
@@ -13,8 +13,9 @@ namespace armnn
{
arm_compute::Status NeonDivisionWorkloadValidate(const TensorInfo& input0,
- const TensorInfo& input1,
- const TensorInfo& output);
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor = nullptr);
class NeonDivisionWorkload : public BaseWorkload<DivisionQueueDescriptor>
{
diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
index e808c60c0c..31489a0c32 100644
--- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
+++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
@@ -6,9 +6,12 @@
#include "NeonFullyConnectedWorkload.hpp"
#include "NeonWorkloadUtils.hpp"
+
#include <aclCommon/ArmComputeTensorUtils.hpp>
#include <aclCommon/ArmComputeUtils.hpp>
+
#include <armnn/utility/PolymorphicDowncast.hpp>
+
#include <backendsCommon/CpuTensorHandle.hpp>
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
@@ -21,7 +24,8 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
const TensorInfo& weights,
const TensorInfo& biases,
- const FullyConnectedDescriptor& descriptor)
+ const FullyConnectedDescriptor& descriptor,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
@@ -36,8 +40,7 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input,
}
const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo =
- ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor);
-
+ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor);
return arm_compute::NEFullyConnectedLayer::validate(&aclInput,
&aclWeights,
@@ -64,9 +67,10 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue
BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo());
}
- // Construct
- arm_compute::FullyConnectedLayerInfo fc_info;
- fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix;
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
+ arm_compute::FullyConnectedLayerInfo fc_info =
+ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
auto layer = std::make_unique<arm_compute::NEFullyConnectedLayer>(memoryManager);
layer->configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info);
diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp
index 1cd8be109a..8dc7fdcd6c 100644
--- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp
+++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp
@@ -21,7 +21,8 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
const TensorInfo& weights,
const TensorInfo& biases,
- const FullyConnectedDescriptor& descriptor);
+ const FullyConnectedDescriptor& descriptor,
+ const ActivationDescriptor* activationDescriptor = nullptr);
class NeonFullyConnectedWorkload : public BaseWorkload<FullyConnectedQueueDescriptor>
{
diff --git a/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp b/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
index 6f78b8eacc..e4ed195922 100644
--- a/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
+++ b/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
@@ -7,6 +7,8 @@
#include "NeonWorkloadUtils.hpp"
+#include <aclCommon/ArmComputeUtils.hpp>
+
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h>
@@ -16,7 +18,8 @@ namespace armnn
arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output)
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
@@ -26,6 +29,9 @@ arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0,
arm_compute::ConvertPolicy::SATURATE :
arm_compute::ConvertPolicy::WRAP;
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
// At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
// when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
// ignored for F32 tensors.
@@ -34,7 +40,8 @@ arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0,
&aclOutput,
1.0f,
convertPolicy,
- arm_compute::RoundingPolicy::TO_ZERO);
+ arm_compute::RoundingPolicy::TO_ZERO,
+ activationInfo);
}
NeonMultiplicationWorkload::NeonMultiplicationWorkload(const MultiplicationQueueDescriptor& descriptor,
@@ -52,6 +59,8 @@ NeonMultiplicationWorkload::NeonMultiplicationWorkload(const MultiplicationQueue
arm_compute::ConvertPolicy::SATURATE :
arm_compute::ConvertPolicy::WRAP;
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
// At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
// when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
// ignored for F32 tensors.
@@ -61,7 +70,8 @@ NeonMultiplicationWorkload::NeonMultiplicationWorkload(const MultiplicationQueue
&output,
1.0f,
convertPolicy,
- arm_compute::RoundingPolicy::TO_ZERO);
+ arm_compute::RoundingPolicy::TO_ZERO,
+ activationInfo);
m_PixelWiseMultiplication.reset(layer.release());
}
diff --git a/src/backends/neon/workloads/NeonMultiplicationWorkload.hpp b/src/backends/neon/workloads/NeonMultiplicationWorkload.hpp
index bfbaf776c1..d2bcd04482 100644
--- a/src/backends/neon/workloads/NeonMultiplicationWorkload.hpp
+++ b/src/backends/neon/workloads/NeonMultiplicationWorkload.hpp
@@ -8,6 +8,7 @@
#include <backendsCommon/Workload.hpp>
#include <arm_compute/core/Error.h>
+#include <arm_compute/core/Types.h>
#include <arm_compute/runtime/IFunction.h>
#include <memory>
@@ -16,7 +17,8 @@ namespace armnn
{
arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output);
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor = nullptr);
class NeonMultiplicationWorkload : public BaseWorkload<MultiplicationQueueDescriptor>
{
diff --git a/src/backends/neon/workloads/NeonSubtractionWorkload.cpp b/src/backends/neon/workloads/NeonSubtractionWorkload.cpp
index ccc2bfe58b..21f0f6fa41 100644
--- a/src/backends/neon/workloads/NeonSubtractionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSubtractionWorkload.cpp
@@ -6,8 +6,12 @@
#include "NeonSubtractionWorkload.hpp"
#include "NeonWorkloadUtils.hpp"
+
#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
+
#include <armnn/utility/PolymorphicDowncast.hpp>
+
#include <backendsCommon/CpuTensorHandle.hpp>
#include <arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h>
@@ -17,16 +21,21 @@ namespace armnn
arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output)
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
return arm_compute::NEArithmeticSubtraction::validate(&aclInput0,
&aclInput1,
&aclOutput,
- arm_compute::ConvertPolicy::SATURATE);
+ arm_compute::ConvertPolicy::SATURATE,
+ activationInfo);
}
NeonSubtractionWorkload::NeonSubtractionWorkload(const SubtractionQueueDescriptor& descriptor,
@@ -39,8 +48,10 @@ NeonSubtractionWorkload::NeonSubtractionWorkload(const SubtractionQueueDescripto
arm_compute::ITensor& input2 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
auto layer = std::make_unique<arm_compute::NEArithmeticSubtraction>();
- layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE);
+ layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE, activationInfo);
m_SubLayer.reset(layer.release());
}
diff --git a/src/backends/neon/workloads/NeonSubtractionWorkload.hpp b/src/backends/neon/workloads/NeonSubtractionWorkload.hpp
index 3326f8bf4a..19d0811a18 100644
--- a/src/backends/neon/workloads/NeonSubtractionWorkload.hpp
+++ b/src/backends/neon/workloads/NeonSubtractionWorkload.hpp
@@ -8,6 +8,7 @@
#include <backendsCommon/Workload.hpp>
#include <arm_compute/core/Error.h>
+#include <arm_compute/core/Types.h>
#include <arm_compute/runtime/IFunction.h>
#include <memory>
@@ -17,7 +18,8 @@ namespace armnn
arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output);
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor = nullptr);
class NeonSubtractionWorkload : public BaseWorkload<SubtractionQueueDescriptor>
{