From f6ae98331f7d08e5ab764ab70a168a523fdd5404 Mon Sep 17 00:00:00 2001 From: Cathal Corbett Date: Thu, 12 May 2022 15:54:58 +0100 Subject: IVGCVSW-6126 ConstTensorsAsInput: Conv2d - Backends !android-nn-driver:7477 Signed-off-by: Cathal Corbett Change-Id: Ibf633ccccc385bd980934ff829407d21981323ef --- src/armnn/Network.cpp | 3 ++ .../test/optimizations/FuseActivationTests.cpp | 31 ++++++------ .../cl/workloads/ClConvolution2dWorkload.cpp | 57 ++++++++-------------- .../cl/workloads/ClConvolution2dWorkload.hpp | 5 -- .../neon/workloads/NeonConvolution2dWorkload.cpp | 21 ++++---- .../neon/workloads/NeonConvolution2dWorkload.hpp | 3 -- 6 files changed, 48 insertions(+), 72 deletions(-) diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 479e57fc56..77ad5c4dc2 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -1832,6 +1832,9 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ConvertConstants"); Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf())); Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsHalfToFloat())); + + // Once the constants are converted we can now safely call RedirectMembersToConstantInputs + Optimizer::Pass(optGraph, MakeOptimizations(RedirectMembersToConstantInputs())); } return optNet; } diff --git a/src/armnn/test/optimizations/FuseActivationTests.cpp b/src/armnn/test/optimizations/FuseActivationTests.cpp index 0cca86f93b..3b8917192d 100644 --- a/src/armnn/test/optimizations/FuseActivationTests.cpp +++ b/src/armnn/test/optimizations/FuseActivationTests.cpp @@ -56,32 +56,35 @@ struct Convolution2dTest float scale = 1.f, int32_t offset = 0) { + IgnoreUnused(scale); + IgnoreUnused(offset); + Convolution2dDescriptor descriptor; descriptor.m_DataLayout = DataLayout::NHWC; descriptor.m_StrideX = 1; descriptor.m_StrideY = 1; + return network->AddConvolution2dLayer(descriptor, name); + } + + static std::vector AddConstantLayers(INetwork* network, + float scale = 1.f, + int32_t offset = 0) + { + std::vector weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, - 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42}; + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42 }; std::vector weightsVector = armnnUtils::QuantizedVector(weightsData, scale, offset); TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset, true); ConstTensor weights(weightsInfo, weightsVector); - Optional optionalBias; - ARMNN_NO_DEPRECATE_WARN_BEGIN - return network->AddConvolution2dLayer(descriptor, weights, optionalBias, name); - ARMNN_NO_DEPRECATE_WARN_END - } - static std::vector AddConstantLayers(INetwork* network, - float scale = 1.f, - int32_t offset = 0) - { - IgnoreUnused(network); - IgnoreUnused(scale); - IgnoreUnused(offset); - return {}; + IConnectableLayer* weightsLayer = network->AddConstantLayer(weights, "Weights"); + weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo); + + std::vector layers = { weightsLayer }; + return layers; } }; diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp index e3d679a773..762645bfba 100644 --- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp +++ b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp @@ -28,7 +28,7 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, bool isFastMathEnabled, const ActivationDescriptor* activationDescriptor) { - // The implemented workload does support both const and non const + // The arm_compute::CLConvolutionLayer supports both const and non const // weights. However, in the case of non const weights we'd have to call // prepare or configure for each inference which we're not setup to do just yet. if (!weights.IsConstant()) @@ -39,7 +39,8 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + aclWeightsInfo.set_are_values_constant(weights.IsConstant()); const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX, descriptor.m_DilationY); @@ -57,6 +58,7 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, "ArmNN ClConvolution2dWorkload does not support non constant bias."}; } aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout); + aclBiasesInfo.set_are_values_constant(biases.value().IsConstant()); optionalAclBiasesInfo = &aclBiasesInfo; } @@ -85,31 +87,31 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip , m_ConvolutionLayer(memoryManager) { ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConvolution2dWorkload"); - const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); - m_Data.ValidateInputsOutputs("ClConvolution2dWorkload", 1, 1); - - m_KernelTensor = std::make_unique(); - BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout); const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(m_Data.m_Parameters.m_DilationX, m_Data.m_Parameters.m_DilationY); - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasTensor = std::make_unique(); - BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), m_Data.m_Parameters.m_DataLayout); - } + uint32_t numInputs = m_Data.m_Parameters.m_BiasEnabled ? 3: 2; + m_Data.ValidateInputsOutputs("ClConvolution2dWorkload", numInputs, 1); arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + arm_compute::ICLTensor& weights = static_cast(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor* bias = nullptr; + if (m_Data.m_Parameters.m_BiasEnabled) + { + bias = &static_cast(m_Data.m_Inputs[2])->GetTensor(); + } // Create Proxy tensor and set the initial tensor handle to it m_InputProxy = std::make_unique(&input); m_OutputProxy = std::make_unique(&output); + arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); input.info()->set_data_layout(aclDataLayout); output.info()->set_data_layout(aclDataLayout); + weights.info()->set_data_layout(aclDataLayout); arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters); @@ -119,8 +121,8 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConvolution2dWorkload_configure"); m_ConvolutionLayer.configure(clCompileContext, m_InputProxy.get(), - m_KernelTensor.get(), - m_BiasTensor.get(), + &weights, + bias, m_OutputProxy.get(), padStrideInfo, arm_compute::WeightsInfo(), @@ -131,7 +133,7 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip m_ConvolutionMethod = m_ConvolutionLayer.get_convolution_method(input.info(), - m_KernelTensor->info(), + weights.info(), output.info(), padStrideInfo, arm_compute::WeightsInfo(), @@ -156,27 +158,12 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClConvolution2dWorkload_Construct", descriptor.m_Parameters, detailsInfo, - this->GetGuid()); - - InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight); - - if (m_BiasTensor) - { - InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias); - } - - // Force Compute Library to perform the necessary copying and reshaping, after which - // delete all the input tensors that will no longer be needed - { - ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConvolution2dWorkload_prepare"); - m_ConvolutionLayer.prepare(); - } - FreeUnusedTensors(); + GetGuid()); } void ClConvolution2dWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClConvolution2dWorkload_Execute", this->GetGuid()); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClConvolution2dWorkload_Execute", GetGuid()); RunClFunction(m_ConvolutionLayer, CHECK_LOCATION()); } @@ -185,12 +172,6 @@ arm_compute::ConvolutionMethod ClConvolution2dWorkload::GetConvolutionMethod() c return m_ConvolutionMethod; } -void ClConvolution2dWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_KernelTensor); - FreeTensorIfUnused(m_BiasTensor); -} - void ClConvolution2dWorkload::Reconfigure() { arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp index bba92d2ad0..7293c830ac 100644 --- a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp +++ b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp @@ -60,13 +60,8 @@ protected: private: mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer; - std::unique_ptr m_KernelTensor; - std::unique_ptr m_BiasTensor; - arm_compute::ConvolutionMethod m_ConvolutionMethod; - void FreeUnusedTensors(); - std::unique_ptr m_InputProxy; std::unique_ptr m_OutputProxy; }; diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp index d5716c8014..12d8c460f9 100644 --- a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp +++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp @@ -29,7 +29,7 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, bool isFastMathEnabled, const ActivationDescriptor* activationDescriptor) { - // The implemented workload does support both const and non const + // arm_compute::NEConvolutionLayer supports both const and non const // weights. However, in the case of non const weights we'd have to call // prepare or configure for each inference which we're not setup to do just yet. if (!weights.IsConstant()) @@ -40,7 +40,8 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + aclWeightsInfo.set_are_values_constant(weights.IsConstant()); const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX, descriptor.m_DilationY); @@ -58,6 +59,7 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, "ArmNN NeonConvolution2dWorkload does not support non constant bias."}; } aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout); + aclBiasesInfo.set_are_values_constant(biases.value().IsConstant()); optionalAclBiasesInfo = &aclBiasesInfo; } @@ -86,7 +88,8 @@ NeonConvolution2dWorkload::NeonConvolution2dWorkload( { using arm_compute::NEConvolutionLayer; - m_Data.ValidateInputsOutputs("NeonConvolution2dWorkload", 1, 1); + uint32_t numInputs = m_Data.m_Parameters.m_BiasEnabled ? 3: 2; + m_Data.ValidateInputsOutputs("NeonConvolution2dWorkload", numInputs, 1); arm_compute::ITensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ITensor& output = PolymorphicDowncast(m_Data.m_Outputs[0])->GetTensor(); @@ -97,7 +100,6 @@ NeonConvolution2dWorkload::NeonConvolution2dWorkload( m_KernelTensor = std::make_unique(); BuildArmComputeTensor(*m_KernelTensor, m_Data.m_Weight->GetTensorInfo(), m_Data.m_Parameters.m_DataLayout); - if (m_Data.m_Parameters.m_BiasEnabled) { m_BiasTensor = std::make_unique(); @@ -148,7 +150,7 @@ NeonConvolution2dWorkload::NeonConvolution2dWorkload( ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonConvolution2dWorkload_Construct", descriptor.m_Parameters, detailsInfo, - this->GetGuid()); + GetGuid()); m_ConvolutionLayer.reset(convolutionLayer.release()); @@ -162,7 +164,8 @@ NeonConvolution2dWorkload::NeonConvolution2dWorkload( } m_ConvolutionLayer->prepare(); - FreeUnusedTensors(); + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); } void NeonConvolution2dWorkload::Execute() const @@ -176,10 +179,4 @@ arm_compute::ConvolutionMethod NeonConvolution2dWorkload::GetConvolutionMethod() return m_ConvolutionMethod; } -void NeonConvolution2dWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_KernelTensor); - FreeTensorIfUnused(m_BiasTensor); -} - } //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp index 93e5cb4691..e833f2ac66 100644 --- a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp +++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp @@ -45,9 +45,6 @@ private: std::unique_ptr m_BiasTensor; arm_compute::ConvolutionMethod m_ConvolutionMethod; - - void FreeUnusedTensors(); - }; } //namespace armnn -- cgit v1.2.1