aboutsummaryrefslogtreecommitdiff
path: root/src/backends/neon
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/neon')
-rw-r--r--src/backends/neon/NeonBackend.cpp10
-rw-r--r--src/backends/neon/NeonBackend.hpp2
-rw-r--r--src/backends/neon/test/NeonLayerTests.cpp6
-rw-r--r--src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp19
-rw-r--r--src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp62
-rw-r--r--src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp2
6 files changed, 54 insertions, 47 deletions
diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp
index 7a258c38e0..39ad4b9f32 100644
--- a/src/backends/neon/NeonBackend.cpp
+++ b/src/backends/neon/NeonBackend.cpp
@@ -250,17 +250,19 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph
else if (base.GetType() == LayerType::FullyConnected)
{
FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
- Optional<TensorInfo> biases;
+ FullyConnectedDescriptor descriptor = baseLayer->GetParameters();
- if (baseLayer->GetParameters().m_BiasEnabled)
+ // As bias is optional only try to get TensorInfo from input if bias is enabled.
+ Optional<TensorInfo> biases;
+ if (descriptor.m_BiasEnabled)
{
- biases = baseLayer->m_Bias->GetTensorInfo();
+ biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
}
arm_compute::Status status = NeonFullyConnectedWorkloadValidate(
baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
- baseLayer->m_Weight->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
biases,
baseLayer->GetParameters(),
&activationDesc);
diff --git a/src/backends/neon/NeonBackend.hpp b/src/backends/neon/NeonBackend.hpp
index e53bacb84a..e3e3782a7f 100644
--- a/src/backends/neon/NeonBackend.hpp
+++ b/src/backends/neon/NeonBackend.hpp
@@ -15,7 +15,7 @@ const BackendCapabilities cpuAccCapabilities("CpuAcc",
{"NonConstWeights", false},
{"AsyncExecution", false},
{"ProtectedContentAllocation", false},
- {"ConstantTensorsAsInputs", false},
+ {"ConstantTensorsAsInputs", true},
{"PreImportIOTensors", false},
{"ExternallyManagedMemory", true},
{"MultiAxisPacking", false},
diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp
index d71ce1f2bf..b813c33763 100644
--- a/src/backends/neon/test/NeonLayerTests.cpp
+++ b/src/backends/neon/test/NeonLayerTests.cpp
@@ -373,7 +373,7 @@ TEST_CASE("DepthwiseConv2dUtils")
TensorInfo inputInfo({1, 1, 10, 10 }, dataType);
TensorInfo outputInfo;
- TensorInfo weightsInfo3x3({ 1, 3, 3, 1 }, dataType); // [1,H,W,I*M]
+ TensorInfo weightsInfo3x3({ 1, 3, 3, 1 }, dataType, 0, 0, true); // [1,H,W,I*M]
TensorInfo biasesInfo;
DepthwiseConvolution2dDescriptor descriptor;
@@ -432,14 +432,14 @@ TEST_CASE("DepthwiseConv2dUtils")
weightsInfo3x3, biasesInfo));
// Supported weights shape 1x1
- TensorInfo weightsInfo1x1({ 1, 1, 1, 1 }, DataType::Float32);
+ TensorInfo weightsInfo1x1({ 1, 1, 1, 1 }, DataType::Float32, 0, 0, true);
descriptor = MakeDepthwiseConv2dDesc(1, 1);
outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo1x1, descriptor, dataType);
CHECK(layerSupport.IsDepthwiseConvolutionSupported(inputInfo, outputInfo, descriptor,
weightsInfo1x1, biasesInfo));
// Supported shape 2x2
- TensorInfo weightsInfo2x2({ 1, 2, 2, 1 }, DataType::Float32);
+ TensorInfo weightsInfo2x2({ 1, 2, 2, 1 }, DataType::Float32, 0, 0, true);
descriptor = MakeDepthwiseConv2dDesc(1, 1);
outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo2x2, descriptor, dataType);
CHECK(layerSupport.IsDepthwiseConvolutionSupported(inputInfo, outputInfo, descriptor,
diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
index b122be62ce..9eeac6e2a3 100644
--- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
@@ -33,6 +33,15 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i
const Optional<TensorInfo>& biases,
const ActivationDescriptor* activationDescriptor)
{
+ // The Neon implemented workload does support both const and non const
+ // weights. However, in the case of non const weights we'd have to call
+ // prepare or configure for each inference which we're not setup to do just yet.
+ if (!weights.IsConstant())
+ {
+ return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
+ "ArmNN NeonDepthwiseConv2dWorkload does not support non constant weights."};
+ }
+
const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
@@ -50,14 +59,22 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i
std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input, descriptor.m_DataLayout);
// Convert the weights into the compute library format
- const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
+ arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
+ aclWeightsInfo.set_are_values_constant(weights.IsConstant());
arm_compute::TensorInfo aclBiasesInfo;
arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
if (descriptor.m_BiasEnabled)
{
ARMNN_ASSERT(biases.has_value());
+ // Same for bias as weights. We don't currently support non const.
+ if (!biases.value().IsConstant())
+ {
+ return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
+ "ArmNN NeonDepthwiseConv2dWorkload does not support non constant bias."};
+ }
aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
+ aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
optionalAclBiasesInfo = &aclBiasesInfo;
}
diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
index 26c68b7d1d..d3716806b3 100644
--- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
+++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
@@ -28,22 +28,37 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input,
const FullyConnectedDescriptor& descriptor,
const ActivationDescriptor* activationDescriptor)
{
+ // The NEON implemented workload does support both const and non const
+ // weights. However, in the case of non const weights we'd have to call
+ // prepare or configure for each inference which we're not setup to do just yet.
+ if (!weights.IsConstant())
+ {
+ return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
+ "Arm NN NeonFullyConnectedWorkload does not support non constant weights."};
+ }
const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
- const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
+ arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
+ aclWeights.set_are_values_constant(weights.IsConstant());
arm_compute::TensorInfo aclBiases;
arm_compute::TensorInfo* optionalAclBiases = nullptr;
if (descriptor.m_BiasEnabled)
{
ARMNN_ASSERT(biases.has_value());
+ // Same for bias as weights. We don't currently support non const.
+ if (!biases.value().IsConstant())
+ {
+ return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
+ "Arm NN NeonFullyConnectedWorkload does not support non constant bias."};
+ }
aclBiases = BuildArmComputeTensorInfo(biases.value());
+ aclBiases.set_are_values_constant(biases.value().IsConstant());
optionalAclBiases = &aclBiases;
}
const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo =
ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor);
-
return arm_compute::NEFullyConnectedLayer::validate(&aclInput,
&aclWeights,
optionalAclBiases,
@@ -61,17 +76,20 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ // Copy the weights' tensor into arm_compute tensor.
m_WeightsTensor = std::make_unique<arm_compute::Tensor>();
BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo());
-
+ InitializeArmComputeTensorData(*m_WeightsTensor, m_Data.m_Weight);
+
if (m_Data.m_Parameters.m_BiasEnabled)
{
+ // Copy the biases tensor into arm_compute tensor.
m_BiasesTensor = std::make_unique<arm_compute::Tensor>();
- BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo());
+ BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo());
+ InitializeArmComputeTensorData(*m_BiasesTensor, m_Data.m_Bias);
}
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
-
arm_compute::FullyConnectedLayerInfo fc_info =
ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
@@ -79,28 +97,6 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue
layer->configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info);
m_FullyConnectedLayer.reset(layer.release());
- // Allocate
- if (m_Data.m_Weight->GetTensorInfo().GetDataType() == DataType::QAsymmU8)
- {
- InitializeArmComputeTensorData(*m_WeightsTensor, m_Data.m_Weight);
- }
- else
- {
- InitializeArmComputeTensorData(*m_WeightsTensor, m_Data.m_Weight);
- }
-
- if (m_BiasesTensor)
- {
- if (m_Data.m_Bias->GetTensorInfo().GetDataType() == DataType::Signed32)
- {
- InitializeArmComputeTensorData(*m_BiasesTensor, m_Data.m_Bias);
- }
- else
- {
- InitializeArmComputeTensorData(*m_BiasesTensor, m_Data.m_Bias);
- }
- }
-
// Add details for profiling output
WorkloadInfo detailsInfo;
@@ -118,10 +114,10 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue
detailsInfo,
this->GetGuid());
- // Force Compute Library to perform the necessary copying and reshaping, after which
- // delete all the input tensors that will no longer be needed
+ // Force Compute Library to perform the necessary copying and reshaping.
m_FullyConnectedLayer->prepare();
- FreeUnusedTensors();
+ FreeTensorIfUnused(m_WeightsTensor);
+ FreeTensorIfUnused(m_BiasesTensor);
}
void NeonFullyConnectedWorkload::Execute() const
@@ -130,10 +126,4 @@ void NeonFullyConnectedWorkload::Execute() const
m_FullyConnectedLayer->run();
}
-void NeonFullyConnectedWorkload::FreeUnusedTensors()
-{
- FreeTensorIfUnused(m_WeightsTensor);
- FreeTensorIfUnused(m_BiasesTensor);
-}
-
} //namespace armnn
diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp
index 419a3299f2..944731d7bd 100644
--- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp
+++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp
@@ -33,11 +33,9 @@ public:
private:
std::unique_ptr<arm_compute::IFunction> m_FullyConnectedLayer;
-
std::unique_ptr<arm_compute::Tensor> m_WeightsTensor;
std::unique_ptr<arm_compute::Tensor> m_BiasesTensor;
- void FreeUnusedTensors();
};
} //namespace armnn