aboutsummaryrefslogtreecommitdiff
path: root/src/backends/cl
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/cl')
-rw-r--r--src/backends/cl/ClBackend.cpp9
-rw-r--r--src/backends/cl/ClBackend.hpp2
-rw-r--r--src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp19
-rw-r--r--src/backends/cl/workloads/ClFullyConnectedWorkload.cpp61
-rw-r--r--src/backends/cl/workloads/ClFullyConnectedWorkload.hpp5
5 files changed, 53 insertions, 43 deletions
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp
index ed6f221511..47990d87dc 100644
--- a/src/backends/cl/ClBackend.cpp
+++ b/src/backends/cl/ClBackend.cpp
@@ -398,18 +398,19 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
else if (base.GetType() == LayerType::FullyConnected)
{
FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
+ FullyConnectedDescriptor descriptor = baseLayer->GetParameters();
+ // As bias is optional only try to get TensorInfo from input if bias is enabled.
Optional<TensorInfo> biases;
-
- if (baseLayer->GetParameters().m_BiasEnabled)
+ if (descriptor.m_BiasEnabled)
{
- biases = baseLayer->m_Bias->GetTensorInfo();
+ biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
}
arm_compute::Status status = ClFullyConnectedWorkloadValidate(
baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
- baseLayer->m_Weight->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
biases,
baseLayer->GetParameters(),
&activationDesc);
diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp
index 99fe9069ff..ce56c3025c 100644
--- a/src/backends/cl/ClBackend.hpp
+++ b/src/backends/cl/ClBackend.hpp
@@ -27,7 +27,7 @@ const BackendCapabilities gpuAccCapabilities("GpuAcc",
{"NonConstWeights", false},
{"AsyncExecution", false},
{"ProtectedContentAllocation", true},
- {"ConstantTensorsAsInputs", false},
+ {"ConstantTensorsAsInputs", true},
{"PreImportIOTensors", false},
{"ExternallyManagedMemory", true},
{"MultiAxisPacking", false},
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
index 9a4cad3ef0..3a972d3f39 100644
--- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
@@ -30,6 +30,15 @@ arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& inp
const Optional<TensorInfo>& biases,
const ActivationDescriptor* activationDescriptor)
{
+ // The CL implemented workload does support both const and non const
+ // weights. However, in the case of non const weights we'd have to call
+ // prepare or configure for each inference which we're not setup to do just yet.
+ if (!weights.IsConstant())
+ {
+ return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
+ "ArmNN ClDepthwiseConv2dWorkload does not support non constant weights."};
+ }
+
const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
@@ -47,14 +56,22 @@ arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& inp
std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
// Convert the weights into the compute library format
- const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
+ arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
+ aclWeightsInfo.set_are_values_constant(weights.IsConstant());
arm_compute::TensorInfo aclBiasesInfo;
arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
if (descriptor.m_BiasEnabled)
{
ARMNN_ASSERT(biases.has_value());
+ // Same for bias as weights. We don't currently support non const.
+ if (!biases.value().IsConstant())
+ {
+ return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
+ "ArmNN ClDepthwiseConv2dWorkload does not support non constant bias."};
+ }
aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
+ aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
optionalAclBiasesInfo = &aclBiasesInfo;
}
diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
index 017f4fff6b..c2da5f297a 100644
--- a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
+++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
@@ -23,22 +23,37 @@ arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input,
const FullyConnectedDescriptor& descriptor,
const ActivationDescriptor* activationDescriptor)
{
+ // The CL implemented workload does support both const and non const
+ // weights. However, in the case of non const weights we'd have to call
+ // prepare or configure for each inference which we're not setup to do just yet.
+ if (!weights.IsConstant())
+ {
+ return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
+ "Arm NN ClFullyConnectedWorkload does not support non constant weights."};
+ }
const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
- const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
+ arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
+ aclWeights.set_are_values_constant(weights.IsConstant());
arm_compute::TensorInfo aclBiases;
arm_compute::TensorInfo* optionalAclBiases = nullptr;
if (descriptor.m_BiasEnabled)
{
ARMNN_ASSERT(biases.has_value());
+ // Same for bias as weights. We don't currently support non const.
+ if (!biases.value().IsConstant())
+ {
+ return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
+ "Arm NN ClFullyConnectedWorkload does not support non constant bias."};
+ }
aclBiases = BuildArmComputeTensorInfo(biases.value());
+ aclBiases.set_are_values_constant(biases.value().IsConstant());
optionalAclBiases = &aclBiases;
}
const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo =
ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor);
-
return arm_compute::CLFullyConnectedLayer::validate(&aclInput,
&aclWeights,
optionalAclBiases,
@@ -70,46 +85,34 @@ ClFullyConnectedWorkload::ClFullyConnectedWorkload(
detailsInfo,
this->GetGuid());
- m_WeightsTensor = std::make_unique<arm_compute::CLTensor>();
- BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo());
+ m_Data.ValidateInputsOutputs("ClFullyConnectedWorkload", descriptor.m_Parameters.GetNumInputs(),
+ 1);
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ arm_compute::ICLTensor& weights = PolymorphicDowncast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+
+ arm_compute::ICLTensor* bias = nullptr;
if (m_Data.m_Parameters.m_BiasEnabled)
{
- m_BiasesTensor = std::make_unique<arm_compute::CLTensor>();
- BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo());
+ bias = &PolymorphicDowncast<IClTensorHandle*>(m_Data.m_Inputs[2])->GetTensor();
}
- m_Data.ValidateInputsOutputs("ClFullyConnectedWorkload", 1, 1);
-
- arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
- arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
-
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
arm_compute::FullyConnectedLayerInfo fc_info =
- ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
+ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters,
+ activationInfo);
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClFullyConnectedWorkload_configure");
m_FullyConnectedLayer.configure(clCompileContext,
&input,
- m_WeightsTensor.get(),
- m_BiasesTensor.get(),
+ &weights,
+ bias,
&output,
fc_info);
}
-
- InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight);
-
- if (m_BiasesTensor)
- {
- InitializeArmComputeClTensorData(*m_BiasesTensor, m_Data.m_Bias);
- }
-
- // Force Compute Library to perform the necessary copying and reshaping, after which
- // delete all the input tensors that will no longer be needed
- m_FullyConnectedLayer.prepare();
- FreeUnusedTensors();
}
void ClFullyConnectedWorkload::Execute() const
@@ -118,10 +121,4 @@ void ClFullyConnectedWorkload::Execute() const
RunClFunction(m_FullyConnectedLayer, CHECK_LOCATION());
}
-void ClFullyConnectedWorkload::FreeUnusedTensors()
-{
- FreeTensorIfUnused(m_WeightsTensor);
- FreeTensorIfUnused(m_BiasesTensor);
-}
-
} //namespace armnn
diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp
index 3ab9f986a8..214c7fcb8b 100644
--- a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp
+++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp
@@ -35,11 +35,6 @@ public:
private:
mutable arm_compute::CLFullyConnectedLayer m_FullyConnectedLayer;
-
- std::unique_ptr<arm_compute::CLTensor> m_WeightsTensor;
- std::unique_ptr<arm_compute::CLTensor> m_BiasesTensor;
-
- void FreeUnusedTensors();
};
} //namespace armnn