diff options
author | Mike Kelly <mike.kelly@arm.com> | 2020-11-12 10:58:48 +0000 |
---|---|---|
committer | Jim Flynn <jim.flynn@arm.com> | 2020-11-13 14:25:30 +0000 |
commit | 07810fc2fcdd34db74222d90cc73ef12a88e7b78 (patch) | |
tree | 8becef8453674822d079815b06ae37310b97d2cf /src/backends/cl/ClBackend.cpp | |
parent | 8502adeafbbb1db0acefa62560d93453e38dcadb (diff) | |
download | armnn-07810fc2fcdd34db74222d90cc73ef12a88e7b78.tar.gz |
IVGCVSW-5328-5329 Fuse Activation
* Added Fused Activation Optimization to both CL and Neon backends.
* Added Fused Activation support to all the CL and Neon workloads
that support it.
* Changed ProfilingTest network to be a Convolution layer
followed by an Abs layer rather than an Activation layer.
* Added IBackendInternal::OptimizeSubgraphView function that can accept a
ModelOptions.
* Network will now call OptimizeSubgraphView passing in the ModelOptions.
Signed-off-by: Keith Davis <keith.davis@arm.com>
Signed-off-by: Mike Kelly <mike.kelly@arm.com>
Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com>
Change-Id: Ib536ac3cbafc7d9b35c139ad9a65b7735262cd9d
Diffstat (limited to 'src/backends/cl/ClBackend.cpp')
-rw-r--r-- | src/backends/cl/ClBackend.cpp | 263 |
1 files changed, 260 insertions, 3 deletions
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp index 6254b0a32a..57a5851650 100644 --- a/src/backends/cl/ClBackend.cpp +++ b/src/backends/cl/ClBackend.cpp @@ -12,16 +12,28 @@ #include "ClTensorHandleFactory.hpp" #include <armnn/BackendRegistry.hpp> +#include <armnn/Descriptors.hpp> +#include <aclCommon/ArmComputeSubgraphUtils.hpp> +#include <aclCommon/ArmComputeUtils.hpp> #include <aclCommon/BaseMemoryManager.hpp> #include <armnn/backends/IBackendContext.hpp> #include <armnn/backends/IMemoryManager.hpp> - #include <armnn/utility/PolymorphicDowncast.hpp> +#include "workloads/ClAdditionWorkload.hpp" +#include "workloads/ClBatchNormalizationFloatWorkload.hpp" +#include "workloads/ClConvolution2dWorkload.hpp" +#include "workloads/ClDepthwiseConvolutionWorkload.hpp" +#include "workloads/ClDivisionFloatWorkload.hpp" +#include "workloads/ClFullyConnectedWorkload.hpp" +#include "workloads/ClMultiplicationWorkload.hpp" +#include "workloads/ClSubtractionWorkload.hpp" + #include <Optimizer.hpp> +#include <arm_compute/core/Types.h> #include <arm_compute/runtime/CL/CLBufferAllocator.h> namespace armnn @@ -129,11 +141,256 @@ IBackendInternal::ILayerSupportSharedPtr ClBackend::GetLayerSupport(const ModelO return layerSupport; } -OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph) const +OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph, + const ModelOptions& modelOptions) const { OptimizationViews optimizationViews; - optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph)); + auto it = subgraph.end(); + bool isFastMathEnabled = false; + +#if defined(ARMCOMPUTECL_ENABLED) + IBackendInternal::IBackendSpecificModelContextPtr modelContextPtr = CreateBackendSpecificModelContext(modelOptions); + + if (modelContextPtr) + { + auto clModelOptions = dynamic_cast<ClBackendModelContext*>(modelContextPtr.get()); + if (clModelOptions) + { + isFastMathEnabled = clModelOptions->IsFastMathEnabled(); + } + } +#endif + + while (it != subgraph.begin()) + { + --it; + Layer& base = **it; + + if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d + || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected + || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication + || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division) + && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr)) + { + for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output) + { + if (output->GetNumConnections() == 1) + { + for (auto&& childInput : output->GetConnections()) + { + if (childInput->GetOwningLayer().GetType() == LayerType::Activation) + { + Layer& child = childInput->GetOwningLayer(); + + auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child); + + const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") + + base.GetName(); + + // Get params from activation layer + ActivationDescriptor activationDesc = activationLayer->GetParameters(); + + if (base.GetType() == LayerType::Convolution2d) + { + Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base); + + Optional<TensorInfo> biases; + + if (baseLayer->GetParameters().m_BiasEnabled) + { + biases = GetOverriddenDataType(baseLayer->m_Bias->GetTensorInfo(), + GetOptionalBiasTypeFromWeightsType( + baseLayer->m_Weight->GetTensorInfo().GetDataType())); + } + + arm_compute::Status status = ClConvolution2dWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetParameters(), + baseLayer->m_Weight->GetTensorInfo(), + biases, + isFastMathEnabled, + &activationDesc); + + if (status) + { + FuseLayerWithWeightsAndBiases<Convolution2dLayer>(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + name); + } + } + else if (base.GetType() == LayerType::DepthwiseConvolution2d) + { + DepthwiseConvolution2dLayer* baseLayer = + PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base); + + Optional<TensorInfo> biases; + + if (baseLayer->GetParameters().m_BiasEnabled) + { + biases = GetOverriddenDataType(baseLayer->m_Bias->GetTensorInfo(), + GetOptionalBiasTypeFromWeightsType( + baseLayer->m_Weight->GetTensorInfo().GetDataType())); + } + + arm_compute::Status status = ClDepthwiseConvolutionWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetParameters(), + baseLayer->m_Weight->GetTensorInfo(), + biases, + &activationDesc); + + if (status) + { + FuseLayerWithWeightsAndBiases<DepthwiseConvolution2dLayer>(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + name); + } + } + else if (base.GetType() == LayerType::FullyConnected) + { + FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base); + + arm_compute::Status status = ClFullyConnectedWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->m_Weight->GetTensorInfo(), + baseLayer->m_Bias->GetTensorInfo(), + baseLayer->GetParameters(), + &activationDesc); + + if (status) + { + FuseLayerWithWeightsAndBiases<FullyConnectedLayer>(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + name); + } + } + else if (base.GetType() == LayerType::BatchNormalization) + { + BatchNormalizationLayer* baseLayer = + PolymorphicDowncast<BatchNormalizationLayer*>(&base); + + arm_compute::Status status = ClBatchNormalizationValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->m_Mean->GetTensorInfo(), + baseLayer->m_Variance->GetTensorInfo(), + baseLayer->m_Beta->GetTensorInfo(), + baseLayer->m_Gamma->GetTensorInfo(), + baseLayer->GetParameters(), + &activationDesc); + + if (status) + { + BatchNormalizationLayer* replacementLayer = + FuseLayerWithParameters<BatchNormalizationLayer>(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + name); + + replacementLayer->m_Beta = std::move(baseLayer->m_Beta); + replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma); + replacementLayer->m_Mean = std::move(baseLayer->m_Mean); + replacementLayer->m_Variance = std::move(baseLayer->m_Variance); + } + } + else if (base.GetType() == LayerType::Addition) + { + AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base); + + arm_compute::Status status = ClAdditionValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseLayerWithoutParameters<AdditionLayer>(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + name); + } + } + else if (base.GetType() == LayerType::Division) + { + DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base); + + arm_compute::Status status = ClDivisionWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseLayerWithoutParameters<DivisionLayer>(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + name); + } + } + else if (base.GetType() == LayerType::Multiplication) + { + MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base); + + arm_compute::Status status = ClMultiplicationWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseLayerWithoutParameters<MultiplicationLayer>(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + name); + } + } + else if (base.GetType() == LayerType::Subtraction) + { + SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base); + + arm_compute::Status status = ClSubtractionValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseLayerWithoutParameters<SubtractionLayer>(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + name); + } + } + } + } + } + } + } + } + // end each optimization + if (optimizationViews.GetSubstitutions().empty()) + { + optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph)); + } return optimizationViews; } |