aboutsummaryrefslogtreecommitdiff
path: root/src/backends/neon/workloads/NeonDivisionWorkload.cpp
diff options
context:
space:
mode:
authorMike Kelly <mike.kelly@arm.com>2020-11-12 10:58:48 +0000
committerJim Flynn <jim.flynn@arm.com>2020-11-13 14:25:30 +0000
commit07810fc2fcdd34db74222d90cc73ef12a88e7b78 (patch)
tree8becef8453674822d079815b06ae37310b97d2cf /src/backends/neon/workloads/NeonDivisionWorkload.cpp
parent8502adeafbbb1db0acefa62560d93453e38dcadb (diff)
downloadarmnn-07810fc2fcdd34db74222d90cc73ef12a88e7b78.tar.gz
IVGCVSW-5328-5329 Fuse Activation
* Added Fused Activation Optimization to both CL and Neon backends. * Added Fused Activation support to all the CL and Neon workloads that support it. * Changed ProfilingTest network to be a Convolution layer followed by an Abs layer rather than an Activation layer. * Added IBackendInternal::OptimizeSubgraphView function that can accept a ModelOptions. * Network will now call OptimizeSubgraphView passing in the ModelOptions. Signed-off-by: Keith Davis <keith.davis@arm.com> Signed-off-by: Mike Kelly <mike.kelly@arm.com> Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com> Change-Id: Ib536ac3cbafc7d9b35c139ad9a65b7735262cd9d
Diffstat (limited to 'src/backends/neon/workloads/NeonDivisionWorkload.cpp')
-rw-r--r--src/backends/neon/workloads/NeonDivisionWorkload.cpp20
1 files changed, 15 insertions, 5 deletions
diff --git a/src/backends/neon/workloads/NeonDivisionWorkload.cpp b/src/backends/neon/workloads/NeonDivisionWorkload.cpp
index fc353f136d..1a26d9510a 100644
--- a/src/backends/neon/workloads/NeonDivisionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDivisionWorkload.cpp
@@ -6,23 +6,31 @@
#include "NeonDivisionWorkload.hpp"
#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
+
#include <armnn/utility/PolymorphicDowncast.hpp>
+
#include <backendsCommon/CpuTensorHandle.hpp>
namespace armnn
{
arm_compute::Status NeonDivisionWorkloadValidate(const TensorInfo& input0,
- const TensorInfo& input1,
- const TensorInfo& output)
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
return arm_compute::NEElementwiseDivision::validate(&aclInput0,
- &aclInput1,
- &aclOutput);
+ &aclInput1,
+ &aclOutput,
+ activationInfo);
}
NeonDivisionWorkload::NeonDivisionWorkload(const DivisionQueueDescriptor& descriptor,
@@ -35,7 +43,9 @@ NeonDivisionWorkload::NeonDivisionWorkload(const DivisionQueueDescriptor& descri
arm_compute::ITensor& input1 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_DivLayer.configure(&input0, &input1, &output);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
+ m_DivLayer.configure(&input0, &input1, &output, activationInfo);
}
void NeonDivisionWorkload::Execute() const