IVGCVSW-5328-5329 Fuse Activation

* Added Fused Activation Optimization to both CL and Neon backends. * Added Fused Activation support to all the CL and Neon workloads that support it. * Changed ProfilingTest network to be a Convolution layer followed by an Abs layer rather than an Activation layer. * Added IBackendInternal::OptimizeSubgraphView function that can accept a ModelOptions. * Network will now call OptimizeSubgraphView passing in the ModelOptions. Signed-off-by: Keith Davis <keith.davis@arm.com> Signed-off-by: Mike Kelly <mike.kelly@arm.com> Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com> Change-Id: Ib536ac3cbafc7d9b35c139ad9a65b7735262cd9d
author: Mike Kelly <mike.kelly@arm.com> 2020-11-12 10:58:48 +0000
committer: Jim Flynn <jim.flynn@arm.com> 2020-11-13 14:25:30 +0000
commit: 07810fc2fcdd34db74222d90cc73ef12a88e7b78 (patch)
tree: 8becef8453674822d079815b06ae37310b97d2cf /src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
parent: 8502adeafbbb1db0acefa62560d93453e38dcadb (diff)
download: armnn-07810fc2fcdd34db74222d90cc73ef12a88e7b78.tar.gz
1 files changed, 13 insertions, 3 deletions
diff --git a/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp b/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
index 6f78b8eacc..e4ed195922 100644
--- a/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
+++ b/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
@@ -7,6 +7,8 @@
 
 #include "NeonWorkloadUtils.hpp"
 
+#include <aclCommon/ArmComputeUtils.hpp>
+
 #include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h>
@@ -16,7 +18,8 @@ namespace armnn
 
 arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0,
                                                        const TensorInfo& input1,
-                                                       const TensorInfo& output)
+                                                       const TensorInfo& output,
+                                                       const ActivationDescriptor* activationDescriptor)
 {
     const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
@@ -26,6 +29,9 @@ arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0,
                           arm_compute::ConvertPolicy::SATURATE :
                           arm_compute::ConvertPolicy::WRAP;
 
+    const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+            activationDescriptor);
+
     // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
     // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
     // ignored for F32 tensors.
@@ -34,7 +40,8 @@ arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0,
                                                             &aclOutput,
                                                             1.0f,
                                                             convertPolicy,
-                                                            arm_compute::RoundingPolicy::TO_ZERO);
+                                                            arm_compute::RoundingPolicy::TO_ZERO,
+                                                            activationInfo);
 }
 
 NeonMultiplicationWorkload::NeonMultiplicationWorkload(const MultiplicationQueueDescriptor& descriptor,
@@ -52,6 +59,8 @@ NeonMultiplicationWorkload::NeonMultiplicationWorkload(const MultiplicationQueue
                           arm_compute::ConvertPolicy::SATURATE :
                           arm_compute::ConvertPolicy::WRAP;
 
+    const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
     // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
     // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
     // ignored for F32 tensors.
@@ -61,7 +70,8 @@ NeonMultiplicationWorkload::NeonMultiplicationWorkload(const MultiplicationQueue
                      &output,
                      1.0f,
                      convertPolicy,
-                     arm_compute::RoundingPolicy::TO_ZERO);
+                     arm_compute::RoundingPolicy::TO_ZERO,
+                     activationInfo);
     m_PixelWiseMultiplication.reset(layer.release());
 }
author	Mike Kelly <mike.kelly@arm.com>	2020-11-12 10:58:48 +0000
committer	Jim Flynn <jim.flynn@arm.com>	2020-11-13 14:25:30 +0000
commit	07810fc2fcdd34db74222d90cc73ef12a88e7b78 (patch)
tree	8becef8453674822d079815b06ae37310b97d2cf /src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
parent	8502adeafbbb1db0acefa62560d93453e38dcadb (diff)
download	armnn-07810fc2fcdd34db74222d90cc73ef12a88e7b78.tar.gz