IVGCVSW-5328-5329 Fuse Activation

* Added Fused Activation Optimization to both CL and Neon backends. * Added Fused Activation support to all the CL and Neon workloads that support it. * Changed ProfilingTest network to be a Convolution layer followed by an Abs layer rather than an Activation layer. * Added IBackendInternal::OptimizeSubgraphView function that can accept a ModelOptions. * Network will now call OptimizeSubgraphView passing in the ModelOptions. Signed-off-by: Keith Davis <keith.davis@arm.com> Signed-off-by: Mike Kelly <mike.kelly@arm.com> Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com> Change-Id: Ib536ac3cbafc7d9b35c139ad9a65b7735262cd9d
author: Mike Kelly <mike.kelly@arm.com> 2020-11-12 10:58:48 +0000
committer: Jim Flynn <jim.flynn@arm.com> 2020-11-13 14:25:30 +0000
commit: 07810fc2fcdd34db74222d90cc73ef12a88e7b78 (patch)
tree: 8becef8453674822d079815b06ae37310b97d2cf /src/backends/aclCommon
parent: 8502adeafbbb1db0acefa62560d93453e38dcadb (diff)
download: armnn-07810fc2fcdd34db74222d90cc73ef12a88e7b78.tar.gz
3 files changed, 185 insertions, 1 deletions
diff --git a/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp b/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp
new file mode 100644
index 0000000000..79744ecf97
--- /dev/null
+++ b/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp
@@ -0,0 +1,145 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/backends/OptimizationViews.hpp>
+
+namespace armnn
+{
+
+namespace
+{
+
+//
+// this helper only works if all layers where the inputs connect to are not selected
+//
+SubgraphView::InputSlots CreateInputsFrom(const std::vector<Layer*>& layers)
+{
+    SubgraphView::InputSlots result;
+    for (auto&& layer : layers)
+    {
+        for (auto&& it = layer->BeginInputSlots(); it != layer->EndInputSlots(); ++it)
+        {
+            result.push_back(&(*it));
+        }
+    }
+    return result;
+}
+
+//
+// this helper only works if all layers where the outputs connect to are not selected
+//
+SubgraphView::OutputSlots CreateOutputsFrom(const std::vector<Layer*>& layers)
+{
+    SubgraphView::OutputSlots result;
+    for (auto&& layer : layers)
+    {
+        for (auto&& it = layer->BeginOutputSlots(); it != layer->EndOutputSlots(); ++it)
+        {
+            result.push_back(&(*it));
+        }
+    }
+    return result;
+}
+
+} // namespace
+
+inline const TensorInfo GetOverriddenDataType(const TensorInfo& info, Optional<DataType> type)
+{
+    if (!type)
+    {
+        return info;
+    }
+
+    return TensorInfo(info.GetShape(), type.value(), info.GetQuantizationScale(), info.GetQuantizationOffset());
+}
+
+inline armnn::Optional<armnn::DataType> GetOptionalBiasTypeFromWeightsType(armnn::Optional<armnn::DataType> weightsType)
+{
+    if (!weightsType)
+    {
+        return weightsType;
+    }
+
+    switch(weightsType.value())
+    {
+        case armnn::DataType::BFloat16:
+        case armnn::DataType::Float16:
+        case armnn::DataType::Float32:
+            return weightsType;
+        case armnn::DataType::QAsymmS8:
+            return armnn::DataType::Signed32;
+        case armnn::DataType::QAsymmU8:
+            return armnn::DataType::Signed32;
+        case armnn::DataType::QSymmS16:
+            return armnn::DataType::Signed32;
+        default:
+            ARMNN_ASSERT_MSG(false, "GetBiasTypeFromWeightsType(): Unsupported data type.");
+    }
+    return armnn::EmptyOptional();
+}
+
+template<typename LayerType>
+LayerType* FuseLayerWithoutParameters(OptimizationViews& optimizationViews,
+                                      LayerType* baseLayer,
+                                      ActivationLayer* activationLayer,
+                                      ActivationDescriptor& activationDesc,
+                                      std::string name)
+{
+    LayerType* replacementLayer = optimizationViews.GetGraph().AddLayer<LayerType>(name.c_str());
+
+    replacementLayer->SetAdditionalInfoForObject(std::make_shared<ActivationDescriptor>(activationDesc));
+
+    SubgraphView substitutionSubgraph(CreateInputsFrom({baseLayer}),
+                                      CreateOutputsFrom({activationLayer}),
+                                      {baseLayer, activationLayer});
+    SubgraphView replacementSubgraph(replacementLayer);
+
+    optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
+    return replacementLayer;
+}
+
+template<typename LayerType>
+LayerType* FuseLayerWithParameters(OptimizationViews& optimizationViews,
+                                   LayerType* baseLayer,
+                                   ActivationLayer* activationLayer,
+                                   ActivationDescriptor& activationDesc,
+                                   std::string name)
+{
+    LayerType* replacementLayer = optimizationViews.GetGraph().AddLayer<LayerType>(baseLayer->GetParameters(),
+                                                                                   name.c_str());
+
+    replacementLayer->SetAdditionalInfoForObject(std::make_shared<ActivationDescriptor>(activationDesc));
+
+    SubgraphView substitutionSubgraph(CreateInputsFrom({baseLayer}),
+                                      CreateOutputsFrom({activationLayer}),
+                                      {baseLayer, activationLayer});
+    SubgraphView replacementSubgraph(replacementLayer);
+
+    optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
+    return replacementLayer;
+}
+
+template<typename LayerType>
+LayerType* FuseLayerWithWeightsAndBiases(OptimizationViews& optimizationViews,
+                                         LayerType* baseLayer,
+                                         ActivationLayer* activationLayer,
+                                         ActivationDescriptor& activationDesc,
+                                         std::string name)
+{
+    LayerType* replacementLayer = FuseLayerWithParameters(optimizationViews,
+                                                          baseLayer,
+                                                          activationLayer,
+                                                          activationDesc,
+                                                          name);
+
+    replacementLayer->m_Weight = std::move(baseLayer->m_Weight);
+    replacementLayer->m_Bias   = std::move(baseLayer->m_Bias);
+
+    return replacementLayer;
+}
+
+} // namespace armnn
diff --git a/src/backends/aclCommon/ArmComputeUtils.hpp b/src/backends/aclCommon/ArmComputeUtils.hpp
index 6b1f975350..adcf8281d2 100644
--- a/src/backends/aclCommon/ArmComputeUtils.hpp
+++ b/src/backends/aclCommon/ArmComputeUtils.hpp
@@ -9,6 +9,8 @@
 #include <armnn/utility/Assert.hpp>
 
 #include <arm_compute/core/Types.h>
+#include "../../../../clframework/arm_compute/core/Types.h"
+#include "../backendsCommon/WorkloadData.hpp"
 
 namespace armnn
 {
@@ -77,6 +79,30 @@ ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor&
         actDesc.m_A, actDesc.m_B);
 }
 
+inline arm_compute::ActivationLayerInfo
+ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor* activationDescPtr)
+{
+    if (activationDescPtr != nullptr)
+    {
+        return ConvertActivationDescriptorToAclActivationLayerInfo(static_cast<ActivationDescriptor>(
+                                                                           *activationDescPtr));
+    }
+    return arm_compute::ActivationLayerInfo();
+}
+
+inline arm_compute::ActivationLayerInfo
+ConvertAdditionalInfoToAclActivationLayerInfo(const QueueDescriptor& queueDescriptor)
+{
+    const ActivationDescriptor* activationDescPtr = queueDescriptor.GetAdditionalInformation<ActivationDescriptor>();
+
+    if (activationDescPtr != nullptr)
+    {
+        return ConvertActivationDescriptorToAclActivationLayerInfo(static_cast<ActivationDescriptor>(
+                *activationDescPtr));
+    }
+    return arm_compute::ActivationLayerInfo();
+}
+
 inline arm_compute::ComparisonOperation ConvertComparisonOperationToAcl(const ComparisonDescriptor& descriptor)
 {
     switch (descriptor.m_Operation)
@@ -130,10 +156,22 @@ ConvertNormalizationAlgorithmChannelToAclNormType(NormalizationAlgorithmChannel
 }
 
 inline arm_compute::FullyConnectedLayerInfo
-ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(const FullyConnectedDescriptor& fullyConnectedDesc)
+ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(const FullyConnectedDescriptor& fullyConnectedDesc,
+                                                            const ActivationDescriptor* activationDesc)
+{
+    arm_compute::FullyConnectedLayerInfo fc_info;
+    fc_info.transpose_weights = fullyConnectedDesc.m_TransposeWeightMatrix;
+    fc_info.activation_info = ConvertActivationDescriptorToAclActivationLayerInfo(activationDesc);
+    return fc_info;
+}
+
+inline arm_compute::FullyConnectedLayerInfo
+ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(const FullyConnectedDescriptor& fullyConnectedDesc,
+        arm_compute::ActivationLayerInfo activationLayerInfo)
 {
     arm_compute::FullyConnectedLayerInfo fc_info;
     fc_info.transpose_weights = fullyConnectedDesc.m_TransposeWeightMatrix;
+    fc_info.activation_info = activationLayerInfo;
     return fc_info;
 }
 
diff --git a/src/backends/aclCommon/CMakeLists.txt b/src/backends/aclCommon/CMakeLists.txt
index fa80437f2d..dac663b20c 100644
--- a/src/backends/aclCommon/CMakeLists.txt
+++ b/src/backends/aclCommon/CMakeLists.txt
@@ -7,6 +7,7 @@ list(APPEND armnnAclCommon_sources
     ArmComputeTensorHandle.hpp
     ArmComputeTensorUtils.hpp
     ArmComputeTensorUtils.cpp
+    ArmComputeSubgraphUtils.hpp
     ArmComputeUtils.hpp
     BaseMemoryManager.cpp
     BaseMemoryManager.hpp
author	Mike Kelly <mike.kelly@arm.com>	2020-11-12 10:58:48 +0000
committer	Jim Flynn <jim.flynn@arm.com>	2020-11-13 14:25:30 +0000
commit	07810fc2fcdd34db74222d90cc73ef12a88e7b78 (patch)
tree	8becef8453674822d079815b06ae37310b97d2cf /src/backends/aclCommon
parent	8502adeafbbb1db0acefa62560d93453e38dcadb (diff)
download	armnn-07810fc2fcdd34db74222d90cc73ef12a88e7b78.tar.gz