aboutsummaryrefslogtreecommitdiff
path: root/src/backends/neon/workloads
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/neon/workloads')
-rw-r--r--src/backends/neon/workloads/NeonAdditionWorkload.cpp15
-rw-r--r--src/backends/neon/workloads/NeonAdditionWorkload.hpp4
-rw-r--r--src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp17
-rw-r--r--src/backends/neon/workloads/NeonBatchNormalizationWorkload.hpp3
-rw-r--r--src/backends/neon/workloads/NeonConvolution2dWorkload.cpp15
-rw-r--r--src/backends/neon/workloads/NeonConvolution2dWorkload.hpp3
-rw-r--r--src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp25
-rw-r--r--src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp4
-rw-r--r--src/backends/neon/workloads/NeonDivisionWorkload.cpp20
-rw-r--r--src/backends/neon/workloads/NeonDivisionWorkload.hpp5
-rw-r--r--src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp16
-rw-r--r--src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp3
-rw-r--r--src/backends/neon/workloads/NeonMultiplicationWorkload.cpp16
-rw-r--r--src/backends/neon/workloads/NeonMultiplicationWorkload.hpp4
-rw-r--r--src/backends/neon/workloads/NeonSubtractionWorkload.cpp17
-rw-r--r--src/backends/neon/workloads/NeonSubtractionWorkload.hpp4
16 files changed, 126 insertions, 45 deletions
diff --git a/src/backends/neon/workloads/NeonAdditionWorkload.cpp b/src/backends/neon/workloads/NeonAdditionWorkload.cpp
index cb0c8a471f..9300b317a9 100644
--- a/src/backends/neon/workloads/NeonAdditionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonAdditionWorkload.cpp
@@ -7,6 +7,8 @@
#include "NeonWorkloadUtils.hpp"
#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
+
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <backendsCommon/CpuTensorHandle.hpp>
@@ -17,16 +19,21 @@ namespace armnn
arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output)
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
return arm_compute::NEArithmeticAddition::validate(&aclInput0,
&aclInput1,
&aclOutput,
- arm_compute::ConvertPolicy::SATURATE);
+ arm_compute::ConvertPolicy::SATURATE,
+ activationInfo);
}
@@ -40,8 +47,10 @@ NeonAdditionWorkload::NeonAdditionWorkload(const AdditionQueueDescriptor& descri
arm_compute::ITensor& input2 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
auto layer = std::make_unique<arm_compute::NEArithmeticAddition>();
- layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE);
+ layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE, activationInfo);
m_AddLayer.reset(layer.release());
}
diff --git a/src/backends/neon/workloads/NeonAdditionWorkload.hpp b/src/backends/neon/workloads/NeonAdditionWorkload.hpp
index 826fb1f3dd..8e43cbdb6d 100644
--- a/src/backends/neon/workloads/NeonAdditionWorkload.hpp
+++ b/src/backends/neon/workloads/NeonAdditionWorkload.hpp
@@ -8,6 +8,7 @@
#include <backendsCommon/Workload.hpp>
#include <arm_compute/core/Error.h>
+#include <arm_compute/core/Types.h>
#include <arm_compute/runtime/IFunction.h>
namespace armnn
@@ -15,7 +16,8 @@ namespace armnn
arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output);
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor = nullptr);
class NeonAdditionWorkload : public BaseWorkload<AdditionQueueDescriptor>
{
diff --git a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp
index ff777dbf9b..33480faf69 100644
--- a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp
+++ b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp
@@ -8,7 +8,10 @@
#include "NeonWorkloadUtils.hpp"
#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
+
#include <armnn/utility/PolymorphicDowncast.hpp>
+
#include <backendsCommon/CpuTensorHandle.hpp>
#include <arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h>
@@ -24,7 +27,8 @@ arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input,
const TensorInfo& var,
const TensorInfo& beta,
const TensorInfo& gamma,
- const BatchNormalizationDescriptor& descriptor)
+ const BatchNormalizationDescriptor& descriptor,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInputInfo =
armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
@@ -39,13 +43,17 @@ arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input,
const arm_compute::TensorInfo aclGammaInfo =
armcomputetensorutils::BuildArmComputeTensorInfo(gamma, descriptor.m_DataLayout);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
return arm_compute::NEBatchNormalizationLayer::validate(&aclInputInfo,
&aclOutputInfo,
&aclMeanInfo,
&aclVarInfo,
&aclBetaInfo,
&aclGammaInfo,
- descriptor.m_Eps);
+ descriptor.m_Eps,
+ activationInfo);
}
NeonBatchNormalizationWorkload::NeonBatchNormalizationWorkload(
@@ -73,6 +81,8 @@ NeonBatchNormalizationWorkload::NeonBatchNormalizationWorkload(
m_Beta = std::make_unique<arm_compute::Tensor>();
BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo());
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
auto layer = std::make_unique<arm_compute::NEBatchNormalizationLayer>();
layer->configure(&input,
&output,
@@ -80,7 +90,8 @@ NeonBatchNormalizationWorkload::NeonBatchNormalizationWorkload(
m_Variance.get(),
m_Beta.get(),
m_Gamma.get(),
- m_Data.m_Parameters.m_Eps);
+ m_Data.m_Parameters.m_Eps,
+ activationInfo);
m_Layer.reset(layer.release());
InitializeArmComputeTensorData(*m_Mean, m_Data.m_Mean);
diff --git a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.hpp b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.hpp
index 3619ea0d73..fea778fb1c 100644
--- a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.hpp
+++ b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.hpp
@@ -21,7 +21,8 @@ arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input,
const TensorInfo& var,
const TensorInfo& beta,
const TensorInfo& gamma,
- const BatchNormalizationDescriptor& descriptor);
+ const BatchNormalizationDescriptor& descriptor,
+ const ActivationDescriptor* activationDescriptor = nullptr);
class NeonBatchNormalizationWorkload : public BaseWorkload<BatchNormalizationQueueDescriptor>
{
diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
index af6f1aee78..fd8be17dfd 100644
--- a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
+++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
@@ -6,6 +6,7 @@
#include "NeonConvolution2dWorkload.hpp"
#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <backendsCommon/CpuTensorHandle.hpp>
#include <neon/workloads/NeonWorkloadUtils.hpp>
@@ -25,7 +26,8 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input,
const Convolution2dDescriptor& descriptor,
const TensorInfo& weights,
const Optional<TensorInfo>& biases,
- bool isFastMathEnabled)
+ bool isFastMathEnabled,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
@@ -47,6 +49,9 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input,
arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
return arm_compute::NEConvolutionLayer::validate(&aclInputInfo,
&aclWeightsInfo,
optionalAclBiasesInfo,
@@ -54,7 +59,7 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input,
layerInfo,
arm_compute::WeightsInfo(),
aclDilationInfo,
- arm_compute::ActivationLayerInfo(),
+ activationInfo,
isFastMathEnabled);
}
@@ -92,6 +97,8 @@ NeonConvolution2dWorkload::NeonConvolution2dWorkload(
const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(m_Data.m_Parameters.m_DilationX,
m_Data.m_Parameters.m_DilationY);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
auto convolutionLayer = std::make_unique<arm_compute::NEConvolutionLayer>(memoryManager);
convolutionLayer->configure(&input,
m_KernelTensor.get(),
@@ -100,7 +107,7 @@ NeonConvolution2dWorkload::NeonConvolution2dWorkload(
padStrideInfo,
arm_compute::WeightsInfo(),
aclDilationInfo,
- arm_compute::ActivationLayerInfo(),
+ activationInfo,
isFastMathEnabled);
m_ConvolutionMethod =
@@ -110,7 +117,7 @@ NeonConvolution2dWorkload::NeonConvolution2dWorkload(
padStrideInfo,
arm_compute::WeightsInfo(),
aclDilationInfo,
- arm_compute::ActivationLayerInfo(),
+ activationInfo,
isFastMathEnabled);
m_ConvolutionLayer.reset(convolutionLayer.release());
diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
index 860d78ba7e..4b6e58ce41 100644
--- a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
+++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
@@ -21,7 +21,8 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input,
const Convolution2dDescriptor& descriptor,
const TensorInfo& weights,
const Optional<TensorInfo>& biases,
- bool isFastMathEnabled = false);
+ bool isFastMathEnabled = false,
+ const ActivationDescriptor* activationDescriptor = nullptr);
class NeonConvolution2dWorkload : public BaseWorkload<Convolution2dQueueDescriptor>
{
diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
index a9a3c75bfd..db6bcc3ecb 100644
--- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
@@ -10,6 +10,7 @@
#include <armnnUtils/DataLayoutIndexed.hpp>
#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
#include <neon/NeonLayerSupport.hpp>
@@ -29,7 +30,8 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i
const TensorInfo& output,
const DepthwiseConvolution2dDescriptor& descriptor,
const TensorInfo& weights,
- const Optional<TensorInfo>& biases)
+ const Optional<TensorInfo>& biases,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
@@ -59,13 +61,16 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i
const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
descriptor.m_DilationX,descriptor.m_DilationY);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo,
&aclWeightsInfo,
optionalAclBiasesInfo,
&aclOutputInfo,
aclPadStrideInfo,
aclDepthMultiplier,
- arm_compute::ActivationLayerInfo(),
+ activationInfo,
aclDilationInfo);
}
@@ -116,16 +121,18 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer>();
static_cast<arm_compute::NEDepthwiseConvolutionLayer*>(
m_pDepthwiseConvolutionLayer.get())->configure(&input,
- m_KernelTensor.get(),
- m_BiasTensor.get(),
- &output,
- padStrideInfo,
- depthMultiplier,
- arm_compute::ActivationLayerInfo(),
- aclDilationInfo);
+ m_KernelTensor.get(),
+ m_BiasTensor.get(),
+ &output,
+ padStrideInfo,
+ depthMultiplier,
+ activationInfo,
+ aclDilationInfo);
ARMNN_ASSERT(m_pDepthwiseConvolutionLayer);
diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp
index 85932d3f9a..d257b91638 100644
--- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp
+++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp
@@ -19,7 +19,9 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i
const TensorInfo& output,
const DepthwiseConvolution2dDescriptor& descriptor,
const TensorInfo& weights,
- const Optional<TensorInfo>& biases);
+ const Optional<TensorInfo>& biases,
+ const ActivationDescriptor* activationDescriptor
+ = nullptr);
class NeonDepthwiseConvolutionWorkload : public BaseWorkload<DepthwiseConvolution2dQueueDescriptor>
{
diff --git a/src/backends/neon/workloads/NeonDivisionWorkload.cpp b/src/backends/neon/workloads/NeonDivisionWorkload.cpp
index fc353f136d..1a26d9510a 100644
--- a/src/backends/neon/workloads/NeonDivisionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDivisionWorkload.cpp
@@ -6,23 +6,31 @@
#include "NeonDivisionWorkload.hpp"
#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
+
#include <armnn/utility/PolymorphicDowncast.hpp>
+
#include <backendsCommon/CpuTensorHandle.hpp>
namespace armnn
{
arm_compute::Status NeonDivisionWorkloadValidate(const TensorInfo& input0,
- const TensorInfo& input1,
- const TensorInfo& output)
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
return arm_compute::NEElementwiseDivision::validate(&aclInput0,
- &aclInput1,
- &aclOutput);
+ &aclInput1,
+ &aclOutput,
+ activationInfo);
}
NeonDivisionWorkload::NeonDivisionWorkload(const DivisionQueueDescriptor& descriptor,
@@ -35,7 +43,9 @@ NeonDivisionWorkload::NeonDivisionWorkload(const DivisionQueueDescriptor& descri
arm_compute::ITensor& input1 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_DivLayer.configure(&input0, &input1, &output);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
+ m_DivLayer.configure(&input0, &input1, &output, activationInfo);
}
void NeonDivisionWorkload::Execute() const
diff --git a/src/backends/neon/workloads/NeonDivisionWorkload.hpp b/src/backends/neon/workloads/NeonDivisionWorkload.hpp
index 2405d9a4ab..fffe02fc00 100644
--- a/src/backends/neon/workloads/NeonDivisionWorkload.hpp
+++ b/src/backends/neon/workloads/NeonDivisionWorkload.hpp
@@ -13,8 +13,9 @@ namespace armnn
{
arm_compute::Status NeonDivisionWorkloadValidate(const TensorInfo& input0,
- const TensorInfo& input1,
- const TensorInfo& output);
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor = nullptr);
class NeonDivisionWorkload : public BaseWorkload<DivisionQueueDescriptor>
{
diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
index e808c60c0c..31489a0c32 100644
--- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
+++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
@@ -6,9 +6,12 @@
#include "NeonFullyConnectedWorkload.hpp"
#include "NeonWorkloadUtils.hpp"
+
#include <aclCommon/ArmComputeTensorUtils.hpp>
#include <aclCommon/ArmComputeUtils.hpp>
+
#include <armnn/utility/PolymorphicDowncast.hpp>
+
#include <backendsCommon/CpuTensorHandle.hpp>
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
@@ -21,7 +24,8 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
const TensorInfo& weights,
const TensorInfo& biases,
- const FullyConnectedDescriptor& descriptor)
+ const FullyConnectedDescriptor& descriptor,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
@@ -36,8 +40,7 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input,
}
const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo =
- ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor);
-
+ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor);
return arm_compute::NEFullyConnectedLayer::validate(&aclInput,
&aclWeights,
@@ -64,9 +67,10 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue
BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo());
}
- // Construct
- arm_compute::FullyConnectedLayerInfo fc_info;
- fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix;
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
+ arm_compute::FullyConnectedLayerInfo fc_info =
+ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
auto layer = std::make_unique<arm_compute::NEFullyConnectedLayer>(memoryManager);
layer->configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info);
diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp
index 1cd8be109a..8dc7fdcd6c 100644
--- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp
+++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp
@@ -21,7 +21,8 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
const TensorInfo& weights,
const TensorInfo& biases,
- const FullyConnectedDescriptor& descriptor);
+ const FullyConnectedDescriptor& descriptor,
+ const ActivationDescriptor* activationDescriptor = nullptr);
class NeonFullyConnectedWorkload : public BaseWorkload<FullyConnectedQueueDescriptor>
{
diff --git a/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp b/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
index 6f78b8eacc..e4ed195922 100644
--- a/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
+++ b/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
@@ -7,6 +7,8 @@
#include "NeonWorkloadUtils.hpp"
+#include <aclCommon/ArmComputeUtils.hpp>
+
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h>
@@ -16,7 +18,8 @@ namespace armnn
arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output)
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
@@ -26,6 +29,9 @@ arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0,
arm_compute::ConvertPolicy::SATURATE :
arm_compute::ConvertPolicy::WRAP;
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
// At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
// when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
// ignored for F32 tensors.
@@ -34,7 +40,8 @@ arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0,
&aclOutput,
1.0f,
convertPolicy,
- arm_compute::RoundingPolicy::TO_ZERO);
+ arm_compute::RoundingPolicy::TO_ZERO,
+ activationInfo);
}
NeonMultiplicationWorkload::NeonMultiplicationWorkload(const MultiplicationQueueDescriptor& descriptor,
@@ -52,6 +59,8 @@ NeonMultiplicationWorkload::NeonMultiplicationWorkload(const MultiplicationQueue
arm_compute::ConvertPolicy::SATURATE :
arm_compute::ConvertPolicy::WRAP;
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
// At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
// when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
// ignored for F32 tensors.
@@ -61,7 +70,8 @@ NeonMultiplicationWorkload::NeonMultiplicationWorkload(const MultiplicationQueue
&output,
1.0f,
convertPolicy,
- arm_compute::RoundingPolicy::TO_ZERO);
+ arm_compute::RoundingPolicy::TO_ZERO,
+ activationInfo);
m_PixelWiseMultiplication.reset(layer.release());
}
diff --git a/src/backends/neon/workloads/NeonMultiplicationWorkload.hpp b/src/backends/neon/workloads/NeonMultiplicationWorkload.hpp
index bfbaf776c1..d2bcd04482 100644
--- a/src/backends/neon/workloads/NeonMultiplicationWorkload.hpp
+++ b/src/backends/neon/workloads/NeonMultiplicationWorkload.hpp
@@ -8,6 +8,7 @@
#include <backendsCommon/Workload.hpp>
#include <arm_compute/core/Error.h>
+#include <arm_compute/core/Types.h>
#include <arm_compute/runtime/IFunction.h>
#include <memory>
@@ -16,7 +17,8 @@ namespace armnn
{
arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output);
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor = nullptr);
class NeonMultiplicationWorkload : public BaseWorkload<MultiplicationQueueDescriptor>
{
diff --git a/src/backends/neon/workloads/NeonSubtractionWorkload.cpp b/src/backends/neon/workloads/NeonSubtractionWorkload.cpp
index ccc2bfe58b..21f0f6fa41 100644
--- a/src/backends/neon/workloads/NeonSubtractionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSubtractionWorkload.cpp
@@ -6,8 +6,12 @@
#include "NeonSubtractionWorkload.hpp"
#include "NeonWorkloadUtils.hpp"
+
#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
+
#include <armnn/utility/PolymorphicDowncast.hpp>
+
#include <backendsCommon/CpuTensorHandle.hpp>
#include <arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h>
@@ -17,16 +21,21 @@ namespace armnn
arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output)
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
return arm_compute::NEArithmeticSubtraction::validate(&aclInput0,
&aclInput1,
&aclOutput,
- arm_compute::ConvertPolicy::SATURATE);
+ arm_compute::ConvertPolicy::SATURATE,
+ activationInfo);
}
NeonSubtractionWorkload::NeonSubtractionWorkload(const SubtractionQueueDescriptor& descriptor,
@@ -39,8 +48,10 @@ NeonSubtractionWorkload::NeonSubtractionWorkload(const SubtractionQueueDescripto
arm_compute::ITensor& input2 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
auto layer = std::make_unique<arm_compute::NEArithmeticSubtraction>();
- layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE);
+ layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE, activationInfo);
m_SubLayer.reset(layer.release());
}
diff --git a/src/backends/neon/workloads/NeonSubtractionWorkload.hpp b/src/backends/neon/workloads/NeonSubtractionWorkload.hpp
index 3326f8bf4a..19d0811a18 100644
--- a/src/backends/neon/workloads/NeonSubtractionWorkload.hpp
+++ b/src/backends/neon/workloads/NeonSubtractionWorkload.hpp
@@ -8,6 +8,7 @@
#include <backendsCommon/Workload.hpp>
#include <arm_compute/core/Error.h>
+#include <arm_compute/core/Types.h>
#include <arm_compute/runtime/IFunction.h>
#include <memory>
@@ -17,7 +18,8 @@ namespace armnn
arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output);
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor = nullptr);
class NeonSubtractionWorkload : public BaseWorkload<SubtractionQueueDescriptor>
{