aboutsummaryrefslogtreecommitdiff
path: root/src/backends/cl
diff options
context:
space:
mode:
authorMike Kelly <mike.kelly@arm.com>2020-11-12 10:58:48 +0000
committerJim Flynn <jim.flynn@arm.com>2020-11-13 14:25:30 +0000
commit07810fc2fcdd34db74222d90cc73ef12a88e7b78 (patch)
tree8becef8453674822d079815b06ae37310b97d2cf /src/backends/cl
parent8502adeafbbb1db0acefa62560d93453e38dcadb (diff)
downloadarmnn-07810fc2fcdd34db74222d90cc73ef12a88e7b78.tar.gz
IVGCVSW-5328-5329 Fuse Activation
* Added Fused Activation Optimization to both CL and Neon backends. * Added Fused Activation support to all the CL and Neon workloads that support it. * Changed ProfilingTest network to be a Convolution layer followed by an Abs layer rather than an Activation layer. * Added IBackendInternal::OptimizeSubgraphView function that can accept a ModelOptions. * Network will now call OptimizeSubgraphView passing in the ModelOptions. Signed-off-by: Keith Davis <keith.davis@arm.com> Signed-off-by: Mike Kelly <mike.kelly@arm.com> Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com> Change-Id: Ib536ac3cbafc7d9b35c139ad9a65b7735262cd9d
Diffstat (limited to 'src/backends/cl')
-rw-r--r--src/backends/cl/ClBackend.cpp263
-rw-r--r--src/backends/cl/ClBackend.hpp3
-rw-r--r--src/backends/cl/ClLayerSupport.cpp27
-rw-r--r--src/backends/cl/workloads/ClAdditionWorkload.cpp15
-rw-r--r--src/backends/cl/workloads/ClAdditionWorkload.hpp3
-rw-r--r--src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp22
-rw-r--r--src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp3
-rw-r--r--src/backends/cl/workloads/ClConvolution2dWorkload.cpp14
-rw-r--r--src/backends/cl/workloads/ClConvolution2dWorkload.hpp3
-rw-r--r--src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp14
-rw-r--r--src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp3
-rw-r--r--src/backends/cl/workloads/ClDivisionFloatWorkload.cpp19
-rw-r--r--src/backends/cl/workloads/ClDivisionFloatWorkload.hpp3
-rw-r--r--src/backends/cl/workloads/ClFullyConnectedWorkload.cpp13
-rw-r--r--src/backends/cl/workloads/ClFullyConnectedWorkload.hpp3
-rw-r--r--src/backends/cl/workloads/ClMultiplicationWorkload.cpp20
-rw-r--r--src/backends/cl/workloads/ClMultiplicationWorkload.hpp3
-rw-r--r--src/backends/cl/workloads/ClSubtractionWorkload.cpp16
-rw-r--r--src/backends/cl/workloads/ClSubtractionWorkload.hpp3
19 files changed, 397 insertions, 53 deletions
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp
index 6254b0a32a..57a5851650 100644
--- a/src/backends/cl/ClBackend.cpp
+++ b/src/backends/cl/ClBackend.cpp
@@ -12,16 +12,28 @@
#include "ClTensorHandleFactory.hpp"
#include <armnn/BackendRegistry.hpp>
+#include <armnn/Descriptors.hpp>
+#include <aclCommon/ArmComputeSubgraphUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
#include <aclCommon/BaseMemoryManager.hpp>
#include <armnn/backends/IBackendContext.hpp>
#include <armnn/backends/IMemoryManager.hpp>
-
#include <armnn/utility/PolymorphicDowncast.hpp>
+#include "workloads/ClAdditionWorkload.hpp"
+#include "workloads/ClBatchNormalizationFloatWorkload.hpp"
+#include "workloads/ClConvolution2dWorkload.hpp"
+#include "workloads/ClDepthwiseConvolutionWorkload.hpp"
+#include "workloads/ClDivisionFloatWorkload.hpp"
+#include "workloads/ClFullyConnectedWorkload.hpp"
+#include "workloads/ClMultiplicationWorkload.hpp"
+#include "workloads/ClSubtractionWorkload.hpp"
+
#include <Optimizer.hpp>
+#include <arm_compute/core/Types.h>
#include <arm_compute/runtime/CL/CLBufferAllocator.h>
namespace armnn
@@ -129,11 +141,256 @@ IBackendInternal::ILayerSupportSharedPtr ClBackend::GetLayerSupport(const ModelO
return layerSupport;
}
-OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph) const
+OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
+ const ModelOptions& modelOptions) const
{
OptimizationViews optimizationViews;
- optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
+ auto it = subgraph.end();
+ bool isFastMathEnabled = false;
+
+#if defined(ARMCOMPUTECL_ENABLED)
+ IBackendInternal::IBackendSpecificModelContextPtr modelContextPtr = CreateBackendSpecificModelContext(modelOptions);
+
+ if (modelContextPtr)
+ {
+ auto clModelOptions = dynamic_cast<ClBackendModelContext*>(modelContextPtr.get());
+ if (clModelOptions)
+ {
+ isFastMathEnabled = clModelOptions->IsFastMathEnabled();
+ }
+ }
+#endif
+
+ while (it != subgraph.begin())
+ {
+ --it;
+ Layer& base = **it;
+
+ if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d
+ || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected
+ || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication
+ || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division)
+ && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
+ {
+ for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
+ {
+ if (output->GetNumConnections() == 1)
+ {
+ for (auto&& childInput : output->GetConnections())
+ {
+ if (childInput->GetOwningLayer().GetType() == LayerType::Activation)
+ {
+ Layer& child = childInput->GetOwningLayer();
+
+ auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
+
+ const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
+ base.GetName();
+
+ // Get params from activation layer
+ ActivationDescriptor activationDesc = activationLayer->GetParameters();
+
+ if (base.GetType() == LayerType::Convolution2d)
+ {
+ Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);
+
+ Optional<TensorInfo> biases;
+
+ if (baseLayer->GetParameters().m_BiasEnabled)
+ {
+ biases = GetOverriddenDataType(baseLayer->m_Bias->GetTensorInfo(),
+ GetOptionalBiasTypeFromWeightsType(
+ baseLayer->m_Weight->GetTensorInfo().GetDataType()));
+ }
+
+ arm_compute::Status status = ClConvolution2dWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetParameters(),
+ baseLayer->m_Weight->GetTensorInfo(),
+ biases,
+ isFastMathEnabled,
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithWeightsAndBiases<Convolution2dLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::DepthwiseConvolution2d)
+ {
+ DepthwiseConvolution2dLayer* baseLayer =
+ PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
+
+ Optional<TensorInfo> biases;
+
+ if (baseLayer->GetParameters().m_BiasEnabled)
+ {
+ biases = GetOverriddenDataType(baseLayer->m_Bias->GetTensorInfo(),
+ GetOptionalBiasTypeFromWeightsType(
+ baseLayer->m_Weight->GetTensorInfo().GetDataType()));
+ }
+
+ arm_compute::Status status = ClDepthwiseConvolutionWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetParameters(),
+ baseLayer->m_Weight->GetTensorInfo(),
+ biases,
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithWeightsAndBiases<DepthwiseConvolution2dLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::FullyConnected)
+ {
+ FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
+
+ arm_compute::Status status = ClFullyConnectedWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->m_Weight->GetTensorInfo(),
+ baseLayer->m_Bias->GetTensorInfo(),
+ baseLayer->GetParameters(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithWeightsAndBiases<FullyConnectedLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::BatchNormalization)
+ {
+ BatchNormalizationLayer* baseLayer =
+ PolymorphicDowncast<BatchNormalizationLayer*>(&base);
+
+ arm_compute::Status status = ClBatchNormalizationValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->m_Mean->GetTensorInfo(),
+ baseLayer->m_Variance->GetTensorInfo(),
+ baseLayer->m_Beta->GetTensorInfo(),
+ baseLayer->m_Gamma->GetTensorInfo(),
+ baseLayer->GetParameters(),
+ &activationDesc);
+
+ if (status)
+ {
+ BatchNormalizationLayer* replacementLayer =
+ FuseLayerWithParameters<BatchNormalizationLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+
+ replacementLayer->m_Beta = std::move(baseLayer->m_Beta);
+ replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma);
+ replacementLayer->m_Mean = std::move(baseLayer->m_Mean);
+ replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
+ }
+ }
+ else if (base.GetType() == LayerType::Addition)
+ {
+ AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
+
+ arm_compute::Status status = ClAdditionValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithoutParameters<AdditionLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::Division)
+ {
+ DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
+
+ arm_compute::Status status = ClDivisionWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithoutParameters<DivisionLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::Multiplication)
+ {
+ MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);
+
+ arm_compute::Status status = ClMultiplicationWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithoutParameters<MultiplicationLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ else if (base.GetType() == LayerType::Subtraction)
+ {
+ SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
+
+ arm_compute::Status status = ClSubtractionValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseLayerWithoutParameters<SubtractionLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ name);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ // end each optimization
+ if (optimizationViews.GetSubstitutions().empty())
+ {
+ optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
+ }
return optimizationViews;
}
diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp
index af5534e0d0..2b19fc5b33 100644
--- a/src/backends/cl/ClBackend.hpp
+++ b/src/backends/cl/ClBackend.hpp
@@ -44,7 +44,8 @@ public:
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override;
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport(const ModelOptions& modelOptions) const override;
- OptimizationViews OptimizeSubgraphView(const SubgraphView& subgraph) const override;
+ OptimizationViews OptimizeSubgraphView(const SubgraphView& subgraph,
+ const ModelOptions& modelOptions) const override;
IBackendInternal::IBackendSpecificModelContextPtr CreateBackendSpecificModelContext(
const ModelOptions& modelOptions) const override;
diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp
index 7c1466e0e1..cce5c9b3bd 100644
--- a/src/backends/cl/ClLayerSupport.cpp
+++ b/src/backends/cl/ClLayerSupport.cpp
@@ -197,7 +197,8 @@ bool ClLayerSupport::IsAdditionSupported(const TensorInfo& input0,
reasonIfUnsupported,
input0,
input1,
- output);
+ output,
+ nullptr);
}
bool ClLayerSupport::IsArgMinMaxSupported(const TensorInfo& input,
@@ -230,7 +231,8 @@ bool ClLayerSupport::IsBatchNormalizationSupported(const TensorInfo& input,
var,
beta,
gamma,
- descriptor);
+ descriptor,
+ nullptr);
}
bool ClLayerSupport::IsBatchToSpaceNdSupported(const TensorInfo& input,
@@ -357,7 +359,8 @@ bool ClLayerSupport::IsConvolution2dSupported(const TensorInfo& input,
descriptor,
weights,
biases,
- isFastMathEnabled);
+ isFastMathEnabled,
+ nullptr);
}
bool ClLayerSupport::IsDequantizeSupported(const TensorInfo& input,
@@ -395,7 +398,8 @@ bool ClLayerSupport::IsDepthwiseConvolutionSupported(const TensorInfo& input,
output,
descriptor,
weights,
- biases);
+ biases,
+ nullptr);
}
bool ClLayerSupport::IsDilatedDepthwiseConvolutionSupported(const TensorInfo& input,
@@ -411,7 +415,8 @@ bool ClLayerSupport::IsDilatedDepthwiseConvolutionSupported(const TensorInfo& in
output,
descriptor,
weights,
- biases);
+ biases,
+ nullptr);
}
@@ -424,7 +429,8 @@ bool ClLayerSupport::IsDivisionSupported(const TensorInfo& input0,
reasonIfUnsupported,
input0,
input1,
- output);
+ output,
+ nullptr);
}
bool ClLayerSupport::IsElementwiseUnarySupported(const TensorInfo& input,
@@ -494,7 +500,8 @@ bool ClLayerSupport::IsFullyConnectedSupported(const TensorInfo& input,
output,
weights,
biases,
- descriptor);
+ descriptor,
+ nullptr);
}
bool ClLayerSupport::IsGatherSupported(const TensorInfo& input0,
@@ -639,7 +646,8 @@ bool ClLayerSupport::IsMultiplicationSupported(const TensorInfo& input0,
reasonIfUnsupported,
input0,
input1,
- output);
+ output,
+ nullptr);
}
bool ClLayerSupport::IsNormalizationSupported(const TensorInfo& input,
@@ -911,7 +919,8 @@ bool ClLayerSupport::IsSubtractionSupported(const TensorInfo& input0,
reasonIfUnsupported,
input0,
input1,
- output);
+ output,
+ nullptr);
}
bool ClLayerSupport::IsTransposeConvolution2dSupported(const TensorInfo& input,
diff --git a/src/backends/cl/workloads/ClAdditionWorkload.cpp b/src/backends/cl/workloads/ClAdditionWorkload.cpp
index 18e2400ccd..7e75a04110 100644
--- a/src/backends/cl/workloads/ClAdditionWorkload.cpp
+++ b/src/backends/cl/workloads/ClAdditionWorkload.cpp
@@ -8,6 +8,7 @@
#include <cl/ClTensorHandle.hpp>
#include <backendsCommon/CpuTensorHandle.hpp>
#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
#include "ClWorkloadUtils.hpp"
@@ -26,7 +27,10 @@ ClAdditionWorkload::ClAdditionWorkload(const AdditionQueueDescriptor& descriptor
arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[1])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
- m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy);
+
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
+ m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy, activationInfo);
}
void ClAdditionWorkload::Execute() const
@@ -37,16 +41,21 @@ void ClAdditionWorkload::Execute() const
arm_compute::Status ClAdditionValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output)
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
const arm_compute::Status aclStatus = arm_compute::CLArithmeticAddition::validate(&aclInput0Info,
&aclInput1Info,
&aclOutputInfo,
- g_AclConvertPolicy);
+ g_AclConvertPolicy,
+ activationInfo);
return aclStatus;
}
diff --git a/src/backends/cl/workloads/ClAdditionWorkload.hpp b/src/backends/cl/workloads/ClAdditionWorkload.hpp
index 62bd0ae20b..372c4bc6f7 100644
--- a/src/backends/cl/workloads/ClAdditionWorkload.hpp
+++ b/src/backends/cl/workloads/ClAdditionWorkload.hpp
@@ -25,5 +25,6 @@ private:
arm_compute::Status ClAdditionValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output);
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor = nullptr);
} //namespace armnn
diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp
index fa0be85100..68942e2a01 100644
--- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp
@@ -4,12 +4,16 @@
//
#include "ClBatchNormalizationFloatWorkload.hpp"
+#include "ClWorkloadUtils.hpp"
+
#include <cl/ClTensorHandle.hpp>
+
#include <backendsCommon/CpuTensorHandle.hpp>
+
#include <aclCommon/ArmComputeTensorUtils.hpp>
-#include <cl/ClLayerSupport.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
-#include "ClWorkloadUtils.hpp"
+#include <cl/ClLayerSupport.hpp>
namespace armnn
{
@@ -21,7 +25,8 @@ arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input,
const TensorInfo& var,
const TensorInfo& beta,
const TensorInfo& gamma,
- const BatchNormalizationDescriptor &desc)
+ const BatchNormalizationDescriptor& desc,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInputInfo =
armcomputetensorutils::BuildArmComputeTensorInfo(input, desc.m_DataLayout);
@@ -36,13 +41,17 @@ arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input,
const arm_compute::TensorInfo aclGammaInfo =
armcomputetensorutils::BuildArmComputeTensorInfo(gamma, desc.m_DataLayout);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
return arm_compute::CLBatchNormalizationLayer::validate(&aclInputInfo,
&aclOutputInfo,
&aclMeanInfo,
&aclVarInfo,
&aclBetaInfo,
&aclGammaInfo,
- desc.m_Eps);
+ desc.m_Eps,
+ activationInfo);
}
ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload(
@@ -70,13 +79,16 @@ ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload(
input.info()->set_data_layout(aclDataLayout);
output.info()->set_data_layout(aclDataLayout);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
m_Layer.configure(&input,
&output,
m_Mean.get(),
m_Variance.get(),
m_Beta.get(),
m_Gamma.get(),
- m_Data.m_Parameters.m_Eps);
+ m_Data.m_Parameters.m_Eps,
+ activationInfo);
InitializeArmComputeClTensorData(*m_Mean, m_Data.m_Mean);
InitializeArmComputeClTensorData(*m_Variance, m_Data.m_Variance);
diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp
index e94bef20ac..ef5778309e 100644
--- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp
+++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp
@@ -19,7 +19,8 @@ arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input,
const TensorInfo& var,
const TensorInfo& beta,
const TensorInfo& gamma,
- const BatchNormalizationDescriptor& desc);
+ const BatchNormalizationDescriptor& desc,
+ const ActivationDescriptor* activationDescriptor = nullptr);
class ClBatchNormalizationFloatWorkload : public FloatWorkload<BatchNormalizationQueueDescriptor>
{
diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
index 7b52f2784f..50cb9ded37 100644
--- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
+++ b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
@@ -25,7 +25,8 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input,
const Convolution2dDescriptor& descriptor,
const TensorInfo& weights,
const Optional<TensorInfo>& biases,
- bool isFastMathEnabled)
+ bool isFastMathEnabled,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
@@ -47,6 +48,9 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input,
arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
return arm_compute::CLConvolutionLayer::validate(&aclInputInfo,
&aclWeightsInfo,
optionalAclBiasesInfo,
@@ -54,7 +58,7 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input,
layerInfo,
arm_compute::WeightsInfo(),
aclDilationInfo,
- arm_compute::ActivationLayerInfo(),
+ activationInfo,
isFastMathEnabled);
}
@@ -91,6 +95,8 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip
arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
m_ConvolutionLayer.configure(&input,
m_KernelTensor.get(),
m_BiasTensor.get(),
@@ -98,7 +104,7 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip
padStrideInfo,
arm_compute::WeightsInfo(),
aclDilationInfo,
- arm_compute::ActivationLayerInfo(),
+ activationInfo,
isFastMathEnabled);
m_ConvolutionMethod =
@@ -107,7 +113,7 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip
output.info(),
padStrideInfo,
arm_compute::WeightsInfo(),
- arm_compute::ActivationLayerInfo(),
+ activationInfo,
arm_compute::CLScheduler::get().target(),
aclDilationInfo,
isFastMathEnabled);
diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
index f769422a0a..70170b569d 100644
--- a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
+++ b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
@@ -23,7 +23,8 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input,
const Convolution2dDescriptor& descriptor,
const TensorInfo& weights,
const Optional<TensorInfo>& biases,
- bool isFastMathEnabled = false);
+ bool isFastMathEnabled = false,
+ const ActivationDescriptor* activationDescriptor = nullptr);
class ClConvolution2dWorkload : public BaseWorkload<Convolution2dQueueDescriptor>
{
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
index 8704b1276f..53f16848eb 100644
--- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
@@ -8,11 +8,13 @@
#include <ResolveType.hpp>
#include "ClWorkloadUtils.hpp"
+#include <armnn/Exceptions.hpp>
#include <aclCommon/ArmComputeUtils.hpp>
#include <aclCommon/ArmComputeTensorUtils.hpp>
#include <cl/ClTensorHandle.hpp>
#include <backendsCommon/CpuTensorHandle.hpp>
#include <backendsCommon/WorkloadUtils.hpp>
+#include <backendsCommon/WorkloadData.hpp>
#include <arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h>
@@ -25,7 +27,8 @@ arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& inp
const TensorInfo& output,
const DepthwiseConvolution2dDescriptor& descriptor,
const TensorInfo& weights,
- const Optional<TensorInfo>& biases)
+ const Optional<TensorInfo>& biases,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
@@ -56,13 +59,16 @@ arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& inp
descriptor.m_DilationX,
descriptor.m_DilationY);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo,
&aclWeightsInfo,
optionalAclBiasesInfo,
&aclOutputInfo,
aclPadStrideInfo,
aclDepthMultiplier,
- arm_compute::ActivationLayerInfo(),
+ activationInfo,
aclDilationInfo);
}
@@ -114,6 +120,8 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload(
arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer>();
static_cast<arm_compute::CLDepthwiseConvolutionLayer*>(m_DepthwiseConvolutionLayer.get())->configure(
&input,
@@ -122,7 +130,7 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload(
&output,
padStrideInfo,
depthMultiplier,
- arm_compute::ActivationLayerInfo(),
+ activationInfo,
aclDilationInfo);
ARMNN_ASSERT(m_DepthwiseConvolutionLayer);
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp
index fc277b9947..c75913737d 100644
--- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp
+++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp
@@ -18,7 +18,8 @@ arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& inp
const TensorInfo& output,
const DepthwiseConvolution2dDescriptor& descriptor,
const TensorInfo& weights,
- const Optional<TensorInfo>& biases);
+ const Optional<TensorInfo>& biases,
+ const ActivationDescriptor* activationDescriptor = nullptr);
class ClDepthwiseConvolutionWorkload : public BaseWorkload<DepthwiseConvolution2dQueueDescriptor>
{
diff --git a/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp b/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp
index 2a27f8a9bc..c79e55ebdd 100644
--- a/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp
@@ -4,9 +4,12 @@
//
#include "ClDivisionFloatWorkload.hpp"
-#include <cl/ClTensorHandle.hpp>
+
+#include <aclCommon/ArmComputeUtils.hpp>
#include <backendsCommon/CpuTensorHandle.hpp>
+#include <cl/ClTensorHandle.hpp>
+
#include "ClWorkloadUtils.hpp"
namespace armnn
@@ -14,13 +17,17 @@ namespace armnn
arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output)
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
- return arm_compute::CLArithmeticDivision::validate(&aclInput1, &aclInput2, &aclOutput);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
+ return arm_compute::CLArithmeticDivision::validate(&aclInput1, &aclInput2, &aclOutput, activationInfo);
}
@@ -33,8 +40,10 @@ ClDivisionFloatWorkload::ClDivisionFloatWorkload(const DivisionQueueDescriptor&
arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- // Construct
- m_ArithmeticDivision.configure(&input0, &input1, &output);
+
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
+ m_ArithmeticDivision.configure(&input0, &input1, &output, activationInfo);
}
void ClDivisionFloatWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp b/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp
index ddca87d78a..71d27ed5b5 100644
--- a/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp
+++ b/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp
@@ -14,7 +14,8 @@ namespace armnn
arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output);
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor = nullptr);
class ClDivisionFloatWorkload : public FloatWorkload<DivisionQueueDescriptor>
{
diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
index 60eb138b42..eaec639f28 100644
--- a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
+++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
@@ -20,7 +20,8 @@ arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
const TensorInfo& weights,
const TensorInfo& biases,
- const FullyConnectedDescriptor& descriptor)
+ const FullyConnectedDescriptor& descriptor,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
@@ -35,7 +36,7 @@ arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input,
}
const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo =
- ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor);
+ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor);
return arm_compute::CLFullyConnectedLayer::validate(&aclInput,
&aclWeights,
@@ -63,9 +64,11 @@ ClFullyConnectedWorkload::ClFullyConnectedWorkload(const FullyConnectedQueueDesc
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- // Construct
- arm_compute::FullyConnectedLayerInfo fc_info;
- fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix;
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
+ arm_compute::FullyConnectedLayerInfo fc_info =
+ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
+
m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info);
InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight);
diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp
index e13436eaa5..311b59498b 100644
--- a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp
+++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp
@@ -19,7 +19,8 @@ arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
const TensorInfo& weights,
const TensorInfo& biases,
- const FullyConnectedDescriptor& descriptor);
+ const FullyConnectedDescriptor& descriptor,
+ const ActivationDescriptor* activationDescriptor = nullptr);
class ClFullyConnectedWorkload : public armnn::BaseWorkload<armnn::FullyConnectedQueueDescriptor>
{
diff --git a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
index e9b75c3f10..46a1c4bc59 100644
--- a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
+++ b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
@@ -4,8 +4,12 @@
//
#include "ClMultiplicationWorkload.hpp"
-#include <cl/ClTensorHandle.hpp>
+
+#include <aclCommon/ArmComputeUtils.hpp>
#include <backendsCommon/CpuTensorHandle.hpp>
+
+#include <cl/ClTensorHandle.hpp>
+
#include "ClWorkloadUtils.hpp"
namespace armnn
@@ -13,7 +17,8 @@ namespace armnn
arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output)
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
@@ -23,6 +28,9 @@ arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0,
arm_compute::ConvertPolicy::SATURATE :
arm_compute::ConvertPolicy::WRAP;
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
// At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
// when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
// ignored for F32 tensors.
@@ -31,7 +39,8 @@ arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0,
&aclOutput,
1.0f,
convertPolicy,
- arm_compute::RoundingPolicy::TO_ZERO);
+ arm_compute::RoundingPolicy::TO_ZERO,
+ activationInfo);
}
@@ -50,13 +59,16 @@ ClMultiplicationWorkload::ClMultiplicationWorkload(const MultiplicationQueueDesc
arm_compute::ConvertPolicy::SATURATE :
arm_compute::ConvertPolicy::WRAP;
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
// Construct
m_PixelWiseMultiplication.configure(&input0,
&input1,
&output,
1.0f,
convertPolicy,
- arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+ arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
+ activationInfo);
}
void ClMultiplicationWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClMultiplicationWorkload.hpp b/src/backends/cl/workloads/ClMultiplicationWorkload.hpp
index 732bb16dcc..461449cc35 100644
--- a/src/backends/cl/workloads/ClMultiplicationWorkload.hpp
+++ b/src/backends/cl/workloads/ClMultiplicationWorkload.hpp
@@ -14,7 +14,8 @@ namespace armnn
arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output);
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor = nullptr);
class ClMultiplicationWorkload : public BaseWorkload<MultiplicationQueueDescriptor>
{
diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.cpp b/src/backends/cl/workloads/ClSubtractionWorkload.cpp
index 38154eb4d7..c9fb556383 100644
--- a/src/backends/cl/workloads/ClSubtractionWorkload.cpp
+++ b/src/backends/cl/workloads/ClSubtractionWorkload.cpp
@@ -7,9 +7,11 @@
#include <cl/ClTensorHandle.hpp>
#include <backendsCommon/CpuTensorHandle.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
#include <aclCommon/ArmComputeTensorUtils.hpp>
#include "ClWorkloadUtils.hpp"
+#include "../../../../include/armnn/ArmNN.hpp"
namespace armnn
{
@@ -26,7 +28,10 @@ ClSubtractionWorkload::ClSubtractionWorkload(const SubtractionQueueDescriptor& d
arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[1])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
- m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy);
+
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
+ m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy, activationInfo);
}
void ClSubtractionWorkload::Execute() const
@@ -37,16 +42,21 @@ void ClSubtractionWorkload::Execute() const
arm_compute::Status ClSubtractionValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output)
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
const arm_compute::Status aclStatus = arm_compute::CLArithmeticSubtraction::validate(&aclInput0Info,
&aclInput1Info,
&aclOutputInfo,
- g_AclConvertPolicy);
+ g_AclConvertPolicy,
+ activationInfo);
return aclStatus;
}
diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.hpp b/src/backends/cl/workloads/ClSubtractionWorkload.hpp
index da6d17c6ac..9f51de645b 100644
--- a/src/backends/cl/workloads/ClSubtractionWorkload.hpp
+++ b/src/backends/cl/workloads/ClSubtractionWorkload.hpp
@@ -25,5 +25,6 @@ private:
arm_compute::Status ClSubtractionValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output);
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor = nullptr);
} //namespace armnn