aboutsummaryrefslogtreecommitdiff
path: root/src/backends/cl/ClBackend.cpp
diff options
context:
space:
mode:
authorMike Kelly <mike.kelly@arm.com>2020-11-12 10:58:48 +0000
committerJim Flynn <jim.flynn@arm.com>2020-11-13 14:25:30 +0000
commit07810fc2fcdd34db74222d90cc73ef12a88e7b78 (patch)
tree8becef8453674822d079815b06ae37310b97d2cf /src/backends/cl/ClBackend.cpp
parent8502adeafbbb1db0acefa62560d93453e38dcadb (diff)
downloadarmnn-07810fc2fcdd34db74222d90cc73ef12a88e7b78.tar.gz
IVGCVSW-5328-5329 Fuse Activation
* Added Fused Activation Optimization to both CL and Neon backends. * Added Fused Activation support to all the CL and Neon workloads that support it. * Changed ProfilingTest network to be a Convolution layer followed by an Abs layer rather than an Activation layer. * Added IBackendInternal::OptimizeSubgraphView function that can accept a ModelOptions. * Network will now call OptimizeSubgraphView passing in the ModelOptions. Signed-off-by: Keith Davis <keith.davis@arm.com> Signed-off-by: Mike Kelly <mike.kelly@arm.com> Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com> Change-Id: Ib536ac3cbafc7d9b35c139ad9a65b7735262cd9d
Diffstat (limited to 'src/backends/cl/ClBackend.cpp')
-rw-r--r--src/backends/cl/ClBackend.cpp263
1 files changed, 260 insertions, 3 deletions
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp
index 6254b0a32a..57a5851650 100644
--- a/src/backends/cl/ClBackend.cpp
+++ b/src/backends/cl/ClBackend.cpp
@@ -12,16 +12,28 @@
#include "ClTensorHandleFactory.hpp"
#include <armnn/BackendRegistry.hpp>
+#include <armnn/Descriptors.hpp>
+#include <aclCommon/ArmComputeSubgraphUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
#include <aclCommon/BaseMemoryManager.hpp>
#include <armnn/backends/IBackendContext.hpp>
#include <armnn/backends/IMemoryManager.hpp>
-
#include <armnn/utility/PolymorphicDowncast.hpp>
+#include "workloads/ClAdditionWorkload.hpp"
+#include "workloads/ClBatchNormalizationFloatWorkload.hpp"
+#include "workloads/ClConvolution2dWorkload.hpp"
+#include "workloads/ClDepthwiseConvolutionWorkload.hpp"
+#include "workloads/ClDivisionFloatWorkload.hpp"
+#include "workloads/ClFullyConnectedWorkload.hpp"
+#include "workloads/ClMultiplicationWorkload.hpp"
+#include "workloads/ClSubtractionWorkload.hpp"
+
#include <Optimizer.hpp>
+#include <arm_compute/core/Types.h>
#include <arm_compute/runtime/CL/CLBufferAllocator.h>
namespace armnn
@@ -129,11 +141,256 @@ IBackendInternal::ILayerSupportSharedPtr ClBackend::GetLayerSupport(const ModelO
return layerSupport;
}
-OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph) const
+OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
+ const ModelOptions& modelOptions) const
{
OptimizationViews optimizationViews;
- optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
+ auto it = subgraph.end();
+ bool isFastMathEnabled = false;
+
+#if defined(ARMCOMPUTECL_ENABLED)
+ IBackendInternal::IBackendSpecificModelContextPtr modelContextPtr = CreateBackendSpecificModelContext(modelOptions);
+
+ if (modelContextPtr)
+ {
+ auto clModelOptions = dynamic_cast<ClBackendModelContext*>(modelContextPtr.get());
+ if (clModelOptions)
+ {
+ isFastMathEnabled = clModelOptions->IsFastMathEnabled();
+ }
+ }
+#endif
+
+ while (it != subgraph.begin())
+ {
+ --it;
+ Layer& base = **it;
+
+ if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d
+ || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected
+ || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication
+ || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division)
+ && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
+ {
+ for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
+ {
+ if (output->GetNumConnections() == 1)
+ {
+ for (auto&& childInput : output->GetConnections())
+ {
+ if (childInput->GetOwningLayer().GetType() == LayerType::Activation)
+ {
+ Layer& child = childInput->GetOwningLayer();
+
+ auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
+
+ const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
+ base.GetName();
+
+ // Get params from activation layer
+ ActivationDescriptor activationDesc = activationLayer->GetParameters();
+
+ if (base.GetType() == LayerType::Convolution2d)
+ {
+ Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);
+
+ Optional<TensorInfo> biases;
+
+ if (baseLayer->GetParameters().m_BiasEnabled)
+ {
+ biases = GetOverriddenDataType(baseLayer->m_Bias->GetTensorInfo(),
+ GetOptionalBiasTypeFromWeightsType(
+ baseLayer->m_Weight->GetTensorInfo().GetDataType()));
+ }
+
+ arm_compute::Status status = ClConvolution2dWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetParameters(),
+ baseLayer->m_Weight->GetTensorInfo(),
+ biases,
+ isFastMathEnabled,
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithWeightsAndBiases<Convolution2dLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::DepthwiseConvolution2d)
+ {
+ DepthwiseConvolution2dLayer* baseLayer =
+ PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
+
+ Optional<TensorInfo> biases;
+
+ if (baseLayer->GetParameters().m_BiasEnabled)
+ {
+ biases = GetOverriddenDataType(baseLayer->m_Bias->GetTensorInfo(),
+ GetOptionalBiasTypeFromWeightsType(
+ baseLayer->m_Weight->GetTensorInfo().GetDataType()));
+ }
+
+ arm_compute::Status status = ClDepthwiseConvolutionWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetParameters(),
+ baseLayer->m_Weight->GetTensorInfo(),
+ biases,
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithWeightsAndBiases<DepthwiseConvolution2dLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::FullyConnected)
+ {
+ FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
+
+ arm_compute::Status status = ClFullyConnectedWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->m_Weight->GetTensorInfo(),
+ baseLayer->m_Bias->GetTensorInfo(),
+ baseLayer->GetParameters(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithWeightsAndBiases<FullyConnectedLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::BatchNormalization)
+ {
+ BatchNormalizationLayer* baseLayer =
+ PolymorphicDowncast<BatchNormalizationLayer*>(&base);
+
+ arm_compute::Status status = ClBatchNormalizationValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->m_Mean->GetTensorInfo(),
+ baseLayer->m_Variance->GetTensorInfo(),
+ baseLayer->m_Beta->GetTensorInfo(),
+ baseLayer->m_Gamma->GetTensorInfo(),
+ baseLayer->GetParameters(),
+ &activationDesc);
+
+ if (status)
+ {
+ BatchNormalizationLayer* replacementLayer =
+ FuseLayerWithParameters<BatchNormalizationLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+
+ replacementLayer->m_Beta = std::move(baseLayer->m_Beta);
+ replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma);
+ replacementLayer->m_Mean = std::move(baseLayer->m_Mean);
+ replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
+ }
+ }
+ else if (base.GetType() == LayerType::Addition)
+ {
+ AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
+
+ arm_compute::Status status = ClAdditionValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithoutParameters<AdditionLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::Division)
+ {
+ DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
+
+ arm_compute::Status status = ClDivisionWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithoutParameters<DivisionLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::Multiplication)
+ {
+ MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);
+
+ arm_compute::Status status = ClMultiplicationWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithoutParameters<MultiplicationLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::Subtraction)
+ {
+ SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
+
+ arm_compute::Status status = ClSubtractionValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithoutParameters<SubtractionLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ // end each optimization
+ if (optimizationViews.GetSubstitutions().empty())
+ {
+ optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
+ }
return optimizationViews;
}