diff options
-rw-r--r-- | src/backends/neon/NeonLayerSupport.cpp | 2 | ||||
-rw-r--r-- | src/backends/neon/NeonWorkloadFactory.cpp | 3 | ||||
-rw-r--r-- | src/backends/neon/backend.mk | 4 | ||||
-rw-r--r-- | src/backends/neon/test/NeonCreateWorkloadTests.cpp | 10 | ||||
-rw-r--r-- | src/backends/neon/workloads/CMakeLists.txt | 8 | ||||
-rw-r--r-- | src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.cpp | 49 | ||||
-rw-r--r-- | src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.cpp | 97 | ||||
-rw-r--r-- | src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.hpp | 33 | ||||
-rw-r--r-- | src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.hpp | 29 | ||||
-rw-r--r-- | src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp (renamed from src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.cpp) | 54 | ||||
-rw-r--r-- | src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp (renamed from src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.hpp) | 17 | ||||
-rw-r--r-- | src/backends/neon/workloads/NeonWorkloads.hpp | 3 |
12 files changed, 73 insertions, 236 deletions
diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp index e28c4f5add..970ee5cc90 100644 --- a/src/backends/neon/NeonLayerSupport.cpp +++ b/src/backends/neon/NeonLayerSupport.cpp @@ -18,7 +18,7 @@ #include "workloads/NeonActivationWorkload.hpp" #include "workloads/NeonBatchNormalizationFloatWorkload.hpp" #include "workloads/NeonConvolution2dWorkload.hpp" -#include "workloads/NeonDepthwiseConvolutionBaseWorkload.hpp" +#include "workloads/NeonDepthwiseConvolutionWorkload.hpp" #include "workloads/NeonL2NormalizationFloatWorkload.hpp" #include "workloads/NeonMultiplicationFloatWorkload.hpp" #include "workloads/NeonNormalizationFloatWorkload.hpp" diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp index f73f3aac1a..b3e1dd9563 100644 --- a/src/backends/neon/NeonWorkloadFactory.cpp +++ b/src/backends/neon/NeonWorkloadFactory.cpp @@ -142,8 +142,7 @@ std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d( std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d( const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<NeonDepthwiseConvolutionFloatWorkload, NeonDepthwiseConvolutionUint8Workload>( - descriptor, info); + return std::make_unique<NeonDepthwiseConvolutionWorkload>(descriptor, info); } std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateNormalization( diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk index 97105ec17e..52e241f520 100644 --- a/src/backends/neon/backend.mk +++ b/src/backends/neon/backend.mk @@ -20,9 +20,7 @@ BACKEND_SOURCES := \ workloads/NeonConvertFp16ToFp32Workload.cpp \ workloads/NeonConvertFp32ToFp16Workload.cpp \ workloads/NeonConvolution2dWorkload.cpp \ - workloads/NeonDepthwiseConvolutionBaseWorkload.cpp \ - workloads/NeonDepthwiseConvolutionFloatWorkload.cpp \ - workloads/NeonDepthwiseConvolutionUint8Workload.cpp \ + workloads/NeonDepthwiseConvolutionWorkload.cpp \ workloads/NeonFloorFloatWorkload.cpp \ workloads/NeonFullyConnectedWorkload.cpp \ workloads/NeonL2NormalizationFloatWorkload.cpp \ diff --git a/src/backends/neon/test/NeonCreateWorkloadTests.cpp b/src/backends/neon/test/NeonCreateWorkloadTests.cpp index 63b3d41500..4b6ab51924 100644 --- a/src/backends/neon/test/NeonCreateWorkloadTests.cpp +++ b/src/backends/neon/test/NeonCreateWorkloadTests.cpp @@ -220,13 +220,13 @@ BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload) NeonCreateConvolution2dWorkloadTest<DataType::Float32>(DataLayout::NHWC); } -template <typename DepthwiseConvolution2dFloat32WorkloadType, typename armnn::DataType DataType> +template <typename armnn::DataType DataType> static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout) { Graph graph; NeonWorkloadFactory factory; - auto workload = CreateDepthwiseConvolution2dWorkloadTest<DepthwiseConvolution2dFloat32WorkloadType, + auto workload = CreateDepthwiseConvolution2dWorkloadTest<NeonDepthwiseConvolutionWorkload, DataType>(factory, graph, dataLayout); // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest). @@ -247,15 +247,13 @@ static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout) BOOST_AUTO_TEST_CASE(CreateDepthWiseConvolution2dFloat32NhwcWorkload) { - NeonCreateDepthWiseConvolutionWorkloadTest<NeonDepthwiseConvolutionFloatWorkload, - DataType::Float32>(DataLayout::NHWC); + NeonCreateDepthWiseConvolutionWorkloadTest<DataType::Float32>(DataLayout::NHWC); } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC BOOST_AUTO_TEST_CASE(CreateDepthWiseConvolution2dFloat16NhwcWorkload) { - NeonCreateDepthWiseConvolutionWorkloadTest<NeonDepthwiseConvolutionFloatWorkload, - DataType::Float16>(DataLayout::NHWC); + NeonCreateDepthWiseConvolutionWorkloadTest<DataType::Float16>(DataLayout::NHWC); } #endif diff --git a/src/backends/neon/workloads/CMakeLists.txt b/src/backends/neon/workloads/CMakeLists.txt index 9c57dffd23..cf47d66656 100644 --- a/src/backends/neon/workloads/CMakeLists.txt +++ b/src/backends/neon/workloads/CMakeLists.txt @@ -18,12 +18,8 @@ list(APPEND armnnNeonBackendWorkloads_sources NeonConvertFp32ToFp16Workload.hpp NeonConvolution2dWorkload.cpp NeonConvolution2dWorkload.hpp - NeonDepthwiseConvolutionBaseWorkload.cpp - NeonDepthwiseConvolutionBaseWorkload.hpp - NeonDepthwiseConvolutionFloatWorkload.cpp - NeonDepthwiseConvolutionFloatWorkload.hpp - NeonDepthwiseConvolutionUint8Workload.cpp - NeonDepthwiseConvolutionUint8Workload.hpp + NeonDepthwiseConvolutionWorkload.cpp + NeonDepthwiseConvolutionWorkload.hpp NeonFloorFloatWorkload.cpp NeonFloorFloatWorkload.hpp NeonFullyConnectedWorkload.cpp diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.cpp deleted file mode 100644 index aa535adec9..0000000000 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonDepthwiseConvolutionBaseWorkload.hpp" - -#include <backends/aclCommon/ArmComputeTensorUtils.hpp> - -namespace armnn -{ - -arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional<TensorInfo>& biases) -{ - const arm_compute::TensorInfo aclInputInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclOutputInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclWeightsInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); - - arm_compute::TensorInfo aclBiasesInfo; - arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; - - if (descriptor.m_BiasEnabled) - { - BOOST_ASSERT(biases.has_value()); - - aclBiasesInfo = armcomputetensorutils::BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout); - optionalAclBiasesInfo = &aclBiasesInfo; - } - - const arm_compute::PadStrideInfo aclPadStrideInfo = - armcomputetensorutils::BuildArmComputePadStrideInfo(descriptor); - const unsigned int aclDepthMultiplier = weights.GetShape()[0]; - - return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo, - &aclWeightsInfo, - optionalAclBiasesInfo, - &aclOutputInfo, - aclPadStrideInfo, - aclDepthMultiplier); -} - -}
\ No newline at end of file diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.cpp deleted file mode 100644 index 9790998ebe..0000000000 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.cpp +++ /dev/null @@ -1,97 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonDepthwiseConvolutionFloatWorkload.hpp" -#include <backends/neon/NeonLayerSupport.hpp> -#include <backends/CpuTensorHandle.hpp> -#include <backends/aclCommon/ArmComputeTensorUtils.hpp> - -namespace armnn -{ -using namespace armcomputetensorutils; - -NeonDepthwiseConvolutionFloatWorkload::NeonDepthwiseConvolutionFloatWorkload( - const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info) -{ - const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); - - m_KernelTensor = std::make_unique<arm_compute::Tensor>(); - BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasTensor = std::make_unique<arm_compute::Tensor>(); - BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), m_Data.m_Parameters.m_DataLayout); - } - - arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, - m_Data.m_Parameters.m_StrideY, - m_Data.m_Parameters.m_PadLeft, - m_Data.m_Parameters.m_PadRight, - m_Data.m_Parameters.m_PadTop, - m_Data.m_Parameters.m_PadBottom, - arm_compute::DimensionRoundingType::FLOOR); - - m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionFloatWorkload", 1, 1); - - arm_compute::ITensor& input = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - - arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); - input.info()->set_data_layout(aclDataLayout); - output.info()->set_data_layout(aclDataLayout); - - bool use3x3Optimisation = weightInfo.GetShape()[3] == 3 && weightInfo.GetShape()[2] == 3; - if (use3x3Optimisation) - { - m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>(); - static_cast<arm_compute::NEDepthwiseConvolutionLayer3x3*>( - m_pDepthwiseConvolutionLayer.get())->configure(&input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo); - } - else - { - m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer>(); - static_cast<arm_compute::NEDepthwiseConvolutionLayer*>( - m_pDepthwiseConvolutionLayer.get())->configure(&input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo); - } - - BOOST_ASSERT(m_pDepthwiseConvolutionLayer); - - InitializeArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight); - - if (m_BiasTensor) - { - InitializeArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias); - } - - m_pDepthwiseConvolutionLayer->prepare(); - FreeUnusedTensors(); -} - -void NeonDepthwiseConvolutionFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionFloatWorkload_Execute"); - BOOST_ASSERT(m_pDepthwiseConvolutionLayer); - - m_pDepthwiseConvolutionLayer->run(); -} - -void NeonDepthwiseConvolutionFloatWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_KernelTensor); - FreeTensorIfUnused(m_BiasTensor); -} - -} //namespace armnn diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.hpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.hpp deleted file mode 100644 index 0109ea10cb..0000000000 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.hpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/neon/workloads/NeonWorkloadUtils.hpp> - -namespace armnn -{ - -class NeonDepthwiseConvolutionFloatWorkload : public FloatWorkload<DepthwiseConvolution2dQueueDescriptor> -{ -public: - NeonDepthwiseConvolutionFloatWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer; - - std::unique_ptr<arm_compute::Tensor> m_KernelTensor; - std::unique_ptr<arm_compute::Tensor> m_BiasTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn - - - - diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.hpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.hpp deleted file mode 100644 index 90cf8b0091..0000000000 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backends/neon/workloads/NeonWorkloadUtils.hpp> - -namespace armnn -{ - -class NeonDepthwiseConvolutionUint8Workload : public Uint8Workload<DepthwiseConvolution2dQueueDescriptor> -{ -public: - NeonDepthwiseConvolutionUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer; - - std::unique_ptr<arm_compute::Tensor> m_KernelTensor; - std::unique_ptr<arm_compute::Tensor> m_BiasTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp index 25d00f92ca..8b1feaa93c 100644 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.cpp +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp @@ -3,19 +3,57 @@ // SPDX-License-Identifier: MIT // -#include "NeonDepthwiseConvolutionUint8Workload.hpp" +#include "NeonDepthwiseConvolutionWorkload.hpp" + +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> #include <backends/neon/NeonLayerSupport.hpp> #include <backends/CpuTensorHandle.hpp> -#include <backends/aclCommon/ArmComputeTensorUtils.hpp> namespace armnn { + using namespace armcomputetensorutils; -NeonDepthwiseConvolutionUint8Workload::NeonDepthwiseConvolutionUint8Workload( +arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const Optional<TensorInfo>& biases) +{ + const arm_compute::TensorInfo aclInputInfo = + BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclOutputInfo = + BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclWeightsInfo = + BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + + arm_compute::TensorInfo aclBiasesInfo; + arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; + + if (descriptor.m_BiasEnabled) + { + BOOST_ASSERT(biases.has_value()); + + aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout); + optionalAclBiasesInfo = &aclBiasesInfo; + } + + const arm_compute::PadStrideInfo aclPadStrideInfo = + BuildArmComputePadStrideInfo(descriptor); + const unsigned int aclDepthMultiplier = weights.GetShape()[0]; + + return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo, + &aclWeightsInfo, + optionalAclBiasesInfo, + &aclOutputInfo, + aclPadStrideInfo, + aclDepthMultiplier); +} + +NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) - : Uint8Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info) + : BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info) { const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); @@ -36,7 +74,7 @@ NeonDepthwiseConvolutionUint8Workload::NeonDepthwiseConvolutionUint8Workload( m_Data.m_Parameters.m_PadBottom, arm_compute::DimensionRoundingType::FLOOR); - m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionUint8Workload", 1, 1); + m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionWorkload", 1, 1); arm_compute::ITensor& input = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); @@ -80,15 +118,15 @@ NeonDepthwiseConvolutionUint8Workload::NeonDepthwiseConvolutionUint8Workload( FreeUnusedTensors(); } -void NeonDepthwiseConvolutionUint8Workload::Execute() const +void NeonDepthwiseConvolutionWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionUint8Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionWorkload_Execute"); BOOST_ASSERT(m_pDepthwiseConvolutionLayer); m_pDepthwiseConvolutionLayer->run(); } -void NeonDepthwiseConvolutionUint8Workload::FreeUnusedTensors() +void NeonDepthwiseConvolutionWorkload::FreeUnusedTensors() { FreeTensorIfUnused(m_KernelTensor); FreeTensorIfUnused(m_BiasTensor); diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.hpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp index ffee50861a..fb93fe8ea0 100644 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.hpp +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp @@ -16,4 +16,21 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i const TensorInfo& weights, const Optional<TensorInfo>& biases); +class NeonDepthwiseConvolutionWorkload : public BaseWorkload<DepthwiseConvolution2dQueueDescriptor> +{ +public: + NeonDepthwiseConvolutionWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + + virtual void Execute() const override; + +private: + mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer; + + std::unique_ptr<arm_compute::Tensor> m_KernelTensor; + std::unique_ptr<arm_compute::Tensor> m_BiasTensor; + + void FreeUnusedTensors(); +}; + } // namespace armnn diff --git a/src/backends/neon/workloads/NeonWorkloads.hpp b/src/backends/neon/workloads/NeonWorkloads.hpp index 688b1f5828..e55cf0d332 100644 --- a/src/backends/neon/workloads/NeonWorkloads.hpp +++ b/src/backends/neon/workloads/NeonWorkloads.hpp @@ -11,8 +11,7 @@ #include "NeonConvertFp16ToFp32Workload.hpp" #include "NeonConvertFp32ToFp16Workload.hpp" #include "NeonConvolution2dWorkload.hpp" -#include "NeonDepthwiseConvolutionFloatWorkload.hpp" -#include "NeonDepthwiseConvolutionUint8Workload.hpp" +#include "NeonDepthwiseConvolutionWorkload.hpp" #include "NeonFloorFloatWorkload.hpp" #include "NeonFullyConnectedWorkload.hpp" #include "NeonL2NormalizationFloatWorkload.hpp" |