From 4452baf3d295164877c5810a3867b1d2d79b04f3 Mon Sep 17 00:00:00 2001 From: Cathal Corbett Date: Fri, 13 May 2022 09:55:59 +0100 Subject: IVGCVSW-6260 ConstTensorsAsInput: Fully Connected Cl and Neon support. * IVGCVSW-6940 ConstTensorsAsInput: DepthwiseConvolution2d - Complete Neon and Cl Bug Fix * Bug fix to enable Cl and Neon Backend Compatibility ConstantTensorsAsInputs * Updated Cl and Neon FullyConnected workloads to handle constant weights and bias as inputs rather than reading from member variables. * Prevent non const weights and biases passing CL and NEON validate for Depthwise Convolution. Signed-off-by: Cathal Corbett Change-Id: I0f505ff5998a183152f843d0f6cc74327ba920e7 --- src/backends/aclCommon/ArmComputeTensorUtils.cpp | 4 ++ .../backendsCommon/test/CompatibilityTests.cpp | 5 +- src/backends/cl/ClBackend.cpp | 9 ++-- src/backends/cl/ClBackend.hpp | 2 +- .../workloads/ClDepthwiseConvolutionWorkload.cpp | 19 ++++++- .../cl/workloads/ClFullyConnectedWorkload.cpp | 61 ++++++++++----------- .../cl/workloads/ClFullyConnectedWorkload.hpp | 5 -- src/backends/neon/NeonBackend.cpp | 10 ++-- src/backends/neon/NeonBackend.hpp | 2 +- src/backends/neon/test/NeonLayerTests.cpp | 6 +-- .../workloads/NeonDepthwiseConvolutionWorkload.cpp | 19 ++++++- .../neon/workloads/NeonFullyConnectedWorkload.cpp | 62 +++++++++------------- .../neon/workloads/NeonFullyConnectedWorkload.hpp | 2 - 13 files changed, 113 insertions(+), 93 deletions(-) diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.cpp b/src/backends/aclCommon/ArmComputeTensorUtils.cpp index 1960332ccf..01c071d43b 100644 --- a/src/backends/aclCommon/ArmComputeTensorUtils.cpp +++ b/src/backends/aclCommon/ArmComputeTensorUtils.cpp @@ -147,6 +147,10 @@ arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& te // Utility function used to build a TensorInfo object, that can be used to initialise // ARM Compute Tensor and CLTensor allocators. +// Note: this utility ignores the value of armnn::TensorInfo.IsConstant(). ACL tensors +// default to constant but Arm NN ones default to non constant. In the cases where +// we expect ACL to treat a tensor as constant that value must be set after this +// utility has been called. arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo) { bool multiScales = tensorInfo.HasMultipleQuantizationScales(); diff --git a/src/backends/backendsCommon/test/CompatibilityTests.cpp b/src/backends/backendsCommon/test/CompatibilityTests.cpp index 3685f75986..c69a4b5f91 100644 --- a/src/backends/backendsCommon/test/CompatibilityTests.cpp +++ b/src/backends/backendsCommon/test/CompatibilityTests.cpp @@ -131,7 +131,6 @@ TEST_SUITE("BackendCapability") namespace { - #if defined(ARMNNREF_ENABLED) || defined(ARMCOMPUTENEON_ENABLED) || defined(ARMCOMPUTECL_ENABLED) void CapabilityTestHelper(BackendCapabilities &capabilities, std::vector> capabilityVector) @@ -198,7 +197,7 @@ TEST_CASE ("Neon_Backends_Capability_Test") {{"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, - {"ConstantTensorsAsInputs", false}, + {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}}); @@ -217,7 +216,7 @@ TEST_CASE ("Cl_Backends_Capability_Test") {{"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", true}, - {"ConstantTensorsAsInputs", false}, + {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}}); diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp index ed6f221511..47990d87dc 100644 --- a/src/backends/cl/ClBackend.cpp +++ b/src/backends/cl/ClBackend.cpp @@ -398,18 +398,19 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph, else if (base.GetType() == LayerType::FullyConnected) { FullyConnectedLayer* baseLayer = PolymorphicDowncast(&base); + FullyConnectedDescriptor descriptor = baseLayer->GetParameters(); + // As bias is optional only try to get TensorInfo from input if bias is enabled. Optional biases; - - if (baseLayer->GetParameters().m_BiasEnabled) + if (descriptor.m_BiasEnabled) { - biases = baseLayer->m_Bias->GetTensorInfo(); + biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo(); } arm_compute::Status status = ClFullyConnectedWorkloadValidate( baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), - baseLayer->m_Weight->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), biases, baseLayer->GetParameters(), &activationDesc); diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp index 99fe9069ff..ce56c3025c 100644 --- a/src/backends/cl/ClBackend.hpp +++ b/src/backends/cl/ClBackend.hpp @@ -27,7 +27,7 @@ const BackendCapabilities gpuAccCapabilities("GpuAcc", {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", true}, - {"ConstantTensorsAsInputs", false}, + {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp index 9a4cad3ef0..3a972d3f39 100644 --- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp @@ -30,6 +30,15 @@ arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& inp const Optional& biases, const ActivationDescriptor* activationDescriptor) { + // The CL implemented workload does support both const and non const + // weights. However, in the case of non const weights we'd have to call + // prepare or configure for each inference which we're not setup to do just yet. + if (!weights.IsConstant()) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "ArmNN ClDepthwiseConv2dWorkload does not support non constant weights."}; + } + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); @@ -47,14 +56,22 @@ arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& inp std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout); // Convert the weights into the compute library format - const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout); + arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout); + aclWeightsInfo.set_are_values_constant(weights.IsConstant()); arm_compute::TensorInfo aclBiasesInfo; arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr; if (descriptor.m_BiasEnabled) { ARMNN_ASSERT(biases.has_value()); + // Same for bias as weights. We don't currently support non const. + if (!biases.value().IsConstant()) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "ArmNN ClDepthwiseConv2dWorkload does not support non constant bias."}; + } aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout); + aclBiasesInfo.set_are_values_constant(biases.value().IsConstant()); optionalAclBiasesInfo = &aclBiasesInfo; } diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp index 017f4fff6b..c2da5f297a 100644 --- a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp +++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp @@ -23,22 +23,37 @@ arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input, const FullyConnectedDescriptor& descriptor, const ActivationDescriptor* activationDescriptor) { + // The CL implemented workload does support both const and non const + // weights. However, in the case of non const weights we'd have to call + // prepare or configure for each inference which we're not setup to do just yet. + if (!weights.IsConstant()) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "Arm NN ClFullyConnectedWorkload does not support non constant weights."}; + } const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); - const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); + arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); + aclWeights.set_are_values_constant(weights.IsConstant()); arm_compute::TensorInfo aclBiases; arm_compute::TensorInfo* optionalAclBiases = nullptr; if (descriptor.m_BiasEnabled) { ARMNN_ASSERT(biases.has_value()); + // Same for bias as weights. We don't currently support non const. + if (!biases.value().IsConstant()) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "Arm NN ClFullyConnectedWorkload does not support non constant bias."}; + } aclBiases = BuildArmComputeTensorInfo(biases.value()); + aclBiases.set_are_values_constant(biases.value().IsConstant()); optionalAclBiases = &aclBiases; } const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo = ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor); - return arm_compute::CLFullyConnectedLayer::validate(&aclInput, &aclWeights, optionalAclBiases, @@ -70,46 +85,34 @@ ClFullyConnectedWorkload::ClFullyConnectedWorkload( detailsInfo, this->GetGuid()); - m_WeightsTensor = std::make_unique(); - BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); + m_Data.ValidateInputsOutputs("ClFullyConnectedWorkload", descriptor.m_Parameters.GetNumInputs(), + 1); + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + arm_compute::ICLTensor& weights = PolymorphicDowncast(m_Data.m_Inputs[1])->GetTensor(); + + arm_compute::ICLTensor* bias = nullptr; if (m_Data.m_Parameters.m_BiasEnabled) { - m_BiasesTensor = std::make_unique(); - BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); + bias = &PolymorphicDowncast(m_Data.m_Inputs[2])->GetTensor(); } - m_Data.ValidateInputsOutputs("ClFullyConnectedWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); arm_compute::FullyConnectedLayerInfo fc_info = - ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo); + ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, + activationInfo); { ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClFullyConnectedWorkload_configure"); m_FullyConnectedLayer.configure(clCompileContext, &input, - m_WeightsTensor.get(), - m_BiasesTensor.get(), + &weights, + bias, &output, fc_info); } - - InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight); - - if (m_BiasesTensor) - { - InitializeArmComputeClTensorData(*m_BiasesTensor, m_Data.m_Bias); - } - - // Force Compute Library to perform the necessary copying and reshaping, after which - // delete all the input tensors that will no longer be needed - m_FullyConnectedLayer.prepare(); - FreeUnusedTensors(); } void ClFullyConnectedWorkload::Execute() const @@ -118,10 +121,4 @@ void ClFullyConnectedWorkload::Execute() const RunClFunction(m_FullyConnectedLayer, CHECK_LOCATION()); } -void ClFullyConnectedWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_WeightsTensor); - FreeTensorIfUnused(m_BiasesTensor); -} - } //namespace armnn diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp index 3ab9f986a8..214c7fcb8b 100644 --- a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp +++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp @@ -35,11 +35,6 @@ public: private: mutable arm_compute::CLFullyConnectedLayer m_FullyConnectedLayer; - - std::unique_ptr m_WeightsTensor; - std::unique_ptr m_BiasesTensor; - - void FreeUnusedTensors(); }; } //namespace armnn diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp index 7a258c38e0..39ad4b9f32 100644 --- a/src/backends/neon/NeonBackend.cpp +++ b/src/backends/neon/NeonBackend.cpp @@ -250,17 +250,19 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph else if (base.GetType() == LayerType::FullyConnected) { FullyConnectedLayer* baseLayer = PolymorphicDowncast(&base); - Optional biases; + FullyConnectedDescriptor descriptor = baseLayer->GetParameters(); - if (baseLayer->GetParameters().m_BiasEnabled) + // As bias is optional only try to get TensorInfo from input if bias is enabled. + Optional biases; + if (descriptor.m_BiasEnabled) { - biases = baseLayer->m_Bias->GetTensorInfo(); + biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo(); } arm_compute::Status status = NeonFullyConnectedWorkloadValidate( baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), - baseLayer->m_Weight->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), biases, baseLayer->GetParameters(), &activationDesc); diff --git a/src/backends/neon/NeonBackend.hpp b/src/backends/neon/NeonBackend.hpp index e53bacb84a..e3e3782a7f 100644 --- a/src/backends/neon/NeonBackend.hpp +++ b/src/backends/neon/NeonBackend.hpp @@ -15,7 +15,7 @@ const BackendCapabilities cpuAccCapabilities("CpuAcc", {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, - {"ConstantTensorsAsInputs", false}, + {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp index d71ce1f2bf..b813c33763 100644 --- a/src/backends/neon/test/NeonLayerTests.cpp +++ b/src/backends/neon/test/NeonLayerTests.cpp @@ -373,7 +373,7 @@ TEST_CASE("DepthwiseConv2dUtils") TensorInfo inputInfo({1, 1, 10, 10 }, dataType); TensorInfo outputInfo; - TensorInfo weightsInfo3x3({ 1, 3, 3, 1 }, dataType); // [1,H,W,I*M] + TensorInfo weightsInfo3x3({ 1, 3, 3, 1 }, dataType, 0, 0, true); // [1,H,W,I*M] TensorInfo biasesInfo; DepthwiseConvolution2dDescriptor descriptor; @@ -432,14 +432,14 @@ TEST_CASE("DepthwiseConv2dUtils") weightsInfo3x3, biasesInfo)); // Supported weights shape 1x1 - TensorInfo weightsInfo1x1({ 1, 1, 1, 1 }, DataType::Float32); + TensorInfo weightsInfo1x1({ 1, 1, 1, 1 }, DataType::Float32, 0, 0, true); descriptor = MakeDepthwiseConv2dDesc(1, 1); outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo1x1, descriptor, dataType); CHECK(layerSupport.IsDepthwiseConvolutionSupported(inputInfo, outputInfo, descriptor, weightsInfo1x1, biasesInfo)); // Supported shape 2x2 - TensorInfo weightsInfo2x2({ 1, 2, 2, 1 }, DataType::Float32); + TensorInfo weightsInfo2x2({ 1, 2, 2, 1 }, DataType::Float32, 0, 0, true); descriptor = MakeDepthwiseConv2dDesc(1, 1); outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo2x2, descriptor, dataType); CHECK(layerSupport.IsDepthwiseConvolutionSupported(inputInfo, outputInfo, descriptor, diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp index b122be62ce..9eeac6e2a3 100644 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp @@ -33,6 +33,15 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i const Optional& biases, const ActivationDescriptor* activationDescriptor) { + // The Neon implemented workload does support both const and non const + // weights. However, in the case of non const weights we'd have to call + // prepare or configure for each inference which we're not setup to do just yet. + if (!weights.IsConstant()) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "ArmNN NeonDepthwiseConv2dWorkload does not support non constant weights."}; + } + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); @@ -50,14 +59,22 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input, descriptor.m_DataLayout); // Convert the weights into the compute library format - const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout); + arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout); + aclWeightsInfo.set_are_values_constant(weights.IsConstant()); arm_compute::TensorInfo aclBiasesInfo; arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr; if (descriptor.m_BiasEnabled) { ARMNN_ASSERT(biases.has_value()); + // Same for bias as weights. We don't currently support non const. + if (!biases.value().IsConstant()) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "ArmNN NeonDepthwiseConv2dWorkload does not support non constant bias."}; + } aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout); + aclBiasesInfo.set_are_values_constant(biases.value().IsConstant()); optionalAclBiasesInfo = &aclBiasesInfo; } diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp index 26c68b7d1d..d3716806b3 100644 --- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp +++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp @@ -28,22 +28,37 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, const FullyConnectedDescriptor& descriptor, const ActivationDescriptor* activationDescriptor) { + // The NEON implemented workload does support both const and non const + // weights. However, in the case of non const weights we'd have to call + // prepare or configure for each inference which we're not setup to do just yet. + if (!weights.IsConstant()) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "Arm NN NeonFullyConnectedWorkload does not support non constant weights."}; + } const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); - const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); + arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); + aclWeights.set_are_values_constant(weights.IsConstant()); arm_compute::TensorInfo aclBiases; arm_compute::TensorInfo* optionalAclBiases = nullptr; if (descriptor.m_BiasEnabled) { ARMNN_ASSERT(biases.has_value()); + // Same for bias as weights. We don't currently support non const. + if (!biases.value().IsConstant()) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "Arm NN NeonFullyConnectedWorkload does not support non constant bias."}; + } aclBiases = BuildArmComputeTensorInfo(biases.value()); + aclBiases.set_are_values_constant(biases.value().IsConstant()); optionalAclBiases = &aclBiases; } const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo = ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor); - return arm_compute::NEFullyConnectedLayer::validate(&aclInput, &aclWeights, optionalAclBiases, @@ -61,17 +76,20 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue arm_compute::ITensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ITensor& output = PolymorphicDowncast(m_Data.m_Outputs[0])->GetTensor(); + // Copy the weights' tensor into arm_compute tensor. m_WeightsTensor = std::make_unique(); BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); - + InitializeArmComputeTensorData(*m_WeightsTensor, m_Data.m_Weight); + if (m_Data.m_Parameters.m_BiasEnabled) { + // Copy the biases tensor into arm_compute tensor. m_BiasesTensor = std::make_unique(); - BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); + BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); + InitializeArmComputeTensorData(*m_BiasesTensor, m_Data.m_Bias); } const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); - arm_compute::FullyConnectedLayerInfo fc_info = ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo); @@ -79,28 +97,6 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue layer->configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); m_FullyConnectedLayer.reset(layer.release()); - // Allocate - if (m_Data.m_Weight->GetTensorInfo().GetDataType() == DataType::QAsymmU8) - { - InitializeArmComputeTensorData(*m_WeightsTensor, m_Data.m_Weight); - } - else - { - InitializeArmComputeTensorData(*m_WeightsTensor, m_Data.m_Weight); - } - - if (m_BiasesTensor) - { - if (m_Data.m_Bias->GetTensorInfo().GetDataType() == DataType::Signed32) - { - InitializeArmComputeTensorData(*m_BiasesTensor, m_Data.m_Bias); - } - else - { - InitializeArmComputeTensorData(*m_BiasesTensor, m_Data.m_Bias); - } - } - // Add details for profiling output WorkloadInfo detailsInfo; @@ -118,10 +114,10 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue detailsInfo, this->GetGuid()); - // Force Compute Library to perform the necessary copying and reshaping, after which - // delete all the input tensors that will no longer be needed + // Force Compute Library to perform the necessary copying and reshaping. m_FullyConnectedLayer->prepare(); - FreeUnusedTensors(); + FreeTensorIfUnused(m_WeightsTensor); + FreeTensorIfUnused(m_BiasesTensor); } void NeonFullyConnectedWorkload::Execute() const @@ -130,10 +126,4 @@ void NeonFullyConnectedWorkload::Execute() const m_FullyConnectedLayer->run(); } -void NeonFullyConnectedWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_WeightsTensor); - FreeTensorIfUnused(m_BiasesTensor); -} - } //namespace armnn diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp index 419a3299f2..944731d7bd 100644 --- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp +++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp @@ -33,11 +33,9 @@ public: private: std::unique_ptr m_FullyConnectedLayer; - std::unique_ptr m_WeightsTensor; std::unique_ptr m_BiasesTensor; - void FreeUnusedTensors(); }; } //namespace armnn -- cgit v1.2.1