From 7714088e14d1fcd63153a80e7439d59a1824a3dc Mon Sep 17 00:00:00 2001 From: Nattapat Chaimanowong Date: Wed, 17 Oct 2018 11:12:19 +0100 Subject: IVGCVSW-1951 Remove type templating from NeonDepthwiseConvolutionWorkload Change-Id: I411d02949524eb802672d83ee281300c34b007c8 --- src/backends/neon/NeonLayerSupport.cpp | 2 +- src/backends/neon/NeonWorkloadFactory.cpp | 3 +- src/backends/neon/backend.mk | 4 +- src/backends/neon/test/NeonCreateWorkloadTests.cpp | 10 +- src/backends/neon/workloads/CMakeLists.txt | 8 +- .../NeonDepthwiseConvolutionBaseWorkload.cpp | 49 -------- .../NeonDepthwiseConvolutionBaseWorkload.hpp | 19 --- .../NeonDepthwiseConvolutionFloatWorkload.cpp | 97 --------------- .../NeonDepthwiseConvolutionFloatWorkload.hpp | 33 ----- .../NeonDepthwiseConvolutionUint8Workload.cpp | 97 --------------- .../NeonDepthwiseConvolutionUint8Workload.hpp | 29 ----- .../workloads/NeonDepthwiseConvolutionWorkload.cpp | 135 +++++++++++++++++++++ .../workloads/NeonDepthwiseConvolutionWorkload.hpp | 36 ++++++ src/backends/neon/workloads/NeonWorkloads.hpp | 3 +- 14 files changed, 181 insertions(+), 344 deletions(-) delete mode 100644 src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.cpp delete mode 100644 src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.hpp delete mode 100644 src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.cpp delete mode 100644 src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.hpp delete mode 100644 src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.cpp delete mode 100644 src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.hpp create mode 100644 src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp create mode 100644 src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp index e28c4f5add..970ee5cc90 100644 --- a/src/backends/neon/NeonLayerSupport.cpp +++ b/src/backends/neon/NeonLayerSupport.cpp @@ -18,7 +18,7 @@ #include "workloads/NeonActivationWorkload.hpp" #include "workloads/NeonBatchNormalizationFloatWorkload.hpp" #include "workloads/NeonConvolution2dWorkload.hpp" -#include "workloads/NeonDepthwiseConvolutionBaseWorkload.hpp" +#include "workloads/NeonDepthwiseConvolutionWorkload.hpp" #include "workloads/NeonL2NormalizationFloatWorkload.hpp" #include "workloads/NeonMultiplicationFloatWorkload.hpp" #include "workloads/NeonNormalizationFloatWorkload.hpp" diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp index f73f3aac1a..b3e1dd9563 100644 --- a/src/backends/neon/NeonWorkloadFactory.cpp +++ b/src/backends/neon/NeonWorkloadFactory.cpp @@ -142,8 +142,7 @@ std::unique_ptr NeonWorkloadFactory::CreateConvolution2d( std::unique_ptr NeonWorkloadFactory::CreateDepthwiseConvolution2d( const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload( - descriptor, info); + return std::make_unique(descriptor, info); } std::unique_ptr NeonWorkloadFactory::CreateNormalization( diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk index 97105ec17e..52e241f520 100644 --- a/src/backends/neon/backend.mk +++ b/src/backends/neon/backend.mk @@ -20,9 +20,7 @@ BACKEND_SOURCES := \ workloads/NeonConvertFp16ToFp32Workload.cpp \ workloads/NeonConvertFp32ToFp16Workload.cpp \ workloads/NeonConvolution2dWorkload.cpp \ - workloads/NeonDepthwiseConvolutionBaseWorkload.cpp \ - workloads/NeonDepthwiseConvolutionFloatWorkload.cpp \ - workloads/NeonDepthwiseConvolutionUint8Workload.cpp \ + workloads/NeonDepthwiseConvolutionWorkload.cpp \ workloads/NeonFloorFloatWorkload.cpp \ workloads/NeonFullyConnectedWorkload.cpp \ workloads/NeonL2NormalizationFloatWorkload.cpp \ diff --git a/src/backends/neon/test/NeonCreateWorkloadTests.cpp b/src/backends/neon/test/NeonCreateWorkloadTests.cpp index 63b3d41500..4b6ab51924 100644 --- a/src/backends/neon/test/NeonCreateWorkloadTests.cpp +++ b/src/backends/neon/test/NeonCreateWorkloadTests.cpp @@ -220,13 +220,13 @@ BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload) NeonCreateConvolution2dWorkloadTest(DataLayout::NHWC); } -template +template static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout) { Graph graph; NeonWorkloadFactory factory; - auto workload = CreateDepthwiseConvolution2dWorkloadTest(factory, graph, dataLayout); // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest). @@ -247,15 +247,13 @@ static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout) BOOST_AUTO_TEST_CASE(CreateDepthWiseConvolution2dFloat32NhwcWorkload) { - NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout::NHWC); + NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout::NHWC); } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC BOOST_AUTO_TEST_CASE(CreateDepthWiseConvolution2dFloat16NhwcWorkload) { - NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout::NHWC); + NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout::NHWC); } #endif diff --git a/src/backends/neon/workloads/CMakeLists.txt b/src/backends/neon/workloads/CMakeLists.txt index 9c57dffd23..cf47d66656 100644 --- a/src/backends/neon/workloads/CMakeLists.txt +++ b/src/backends/neon/workloads/CMakeLists.txt @@ -18,12 +18,8 @@ list(APPEND armnnNeonBackendWorkloads_sources NeonConvertFp32ToFp16Workload.hpp NeonConvolution2dWorkload.cpp NeonConvolution2dWorkload.hpp - NeonDepthwiseConvolutionBaseWorkload.cpp - NeonDepthwiseConvolutionBaseWorkload.hpp - NeonDepthwiseConvolutionFloatWorkload.cpp - NeonDepthwiseConvolutionFloatWorkload.hpp - NeonDepthwiseConvolutionUint8Workload.cpp - NeonDepthwiseConvolutionUint8Workload.hpp + NeonDepthwiseConvolutionWorkload.cpp + NeonDepthwiseConvolutionWorkload.hpp NeonFloorFloatWorkload.cpp NeonFloorFloatWorkload.hpp NeonFullyConnectedWorkload.cpp diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.cpp deleted file mode 100644 index aa535adec9..0000000000 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonDepthwiseConvolutionBaseWorkload.hpp" - -#include - -namespace armnn -{ - -arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional& biases) -{ - const arm_compute::TensorInfo aclInputInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclOutputInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclWeightsInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); - - arm_compute::TensorInfo aclBiasesInfo; - arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; - - if (descriptor.m_BiasEnabled) - { - BOOST_ASSERT(biases.has_value()); - - aclBiasesInfo = armcomputetensorutils::BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout); - optionalAclBiasesInfo = &aclBiasesInfo; - } - - const arm_compute::PadStrideInfo aclPadStrideInfo = - armcomputetensorutils::BuildArmComputePadStrideInfo(descriptor); - const unsigned int aclDepthMultiplier = weights.GetShape()[0]; - - return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo, - &aclWeightsInfo, - optionalAclBiasesInfo, - &aclOutputInfo, - aclPadStrideInfo, - aclDepthMultiplier); -} - -} \ No newline at end of file diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.hpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.hpp deleted file mode 100644 index ffee50861a..0000000000 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.hpp +++ /dev/null @@ -1,19 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include - -namespace armnn -{ - -arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const Optional& biases); - -} // namespace armnn diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.cpp deleted file mode 100644 index 9790998ebe..0000000000 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.cpp +++ /dev/null @@ -1,97 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonDepthwiseConvolutionFloatWorkload.hpp" -#include -#include -#include - -namespace armnn -{ -using namespace armcomputetensorutils; - -NeonDepthwiseConvolutionFloatWorkload::NeonDepthwiseConvolutionFloatWorkload( - const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload(descriptor, info) -{ - const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); - - m_KernelTensor = std::make_unique(); - BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasTensor = std::make_unique(); - BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), m_Data.m_Parameters.m_DataLayout); - } - - arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, - m_Data.m_Parameters.m_StrideY, - m_Data.m_Parameters.m_PadLeft, - m_Data.m_Parameters.m_PadRight, - m_Data.m_Parameters.m_PadTop, - m_Data.m_Parameters.m_PadBottom, - arm_compute::DimensionRoundingType::FLOOR); - - m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionFloatWorkload", 1, 1); - - arm_compute::ITensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - - arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); - input.info()->set_data_layout(aclDataLayout); - output.info()->set_data_layout(aclDataLayout); - - bool use3x3Optimisation = weightInfo.GetShape()[3] == 3 && weightInfo.GetShape()[2] == 3; - if (use3x3Optimisation) - { - m_pDepthwiseConvolutionLayer = std::make_unique(); - static_cast( - m_pDepthwiseConvolutionLayer.get())->configure(&input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo); - } - else - { - m_pDepthwiseConvolutionLayer = std::make_unique(); - static_cast( - m_pDepthwiseConvolutionLayer.get())->configure(&input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo); - } - - BOOST_ASSERT(m_pDepthwiseConvolutionLayer); - - InitializeArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight); - - if (m_BiasTensor) - { - InitializeArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias); - } - - m_pDepthwiseConvolutionLayer->prepare(); - FreeUnusedTensors(); -} - -void NeonDepthwiseConvolutionFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionFloatWorkload_Execute"); - BOOST_ASSERT(m_pDepthwiseConvolutionLayer); - - m_pDepthwiseConvolutionLayer->run(); -} - -void NeonDepthwiseConvolutionFloatWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_KernelTensor); - FreeTensorIfUnused(m_BiasTensor); -} - -} //namespace armnn diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.hpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.hpp deleted file mode 100644 index 0109ea10cb..0000000000 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.hpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include - -namespace armnn -{ - -class NeonDepthwiseConvolutionFloatWorkload : public FloatWorkload -{ -public: - NeonDepthwiseConvolutionFloatWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable std::unique_ptr m_pDepthwiseConvolutionLayer; - - std::unique_ptr m_KernelTensor; - std::unique_ptr m_BiasTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn - - - - diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.cpp deleted file mode 100644 index 25d00f92ca..0000000000 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.cpp +++ /dev/null @@ -1,97 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonDepthwiseConvolutionUint8Workload.hpp" -#include -#include -#include - -namespace armnn -{ -using namespace armcomputetensorutils; - -NeonDepthwiseConvolutionUint8Workload::NeonDepthwiseConvolutionUint8Workload( - const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) - : Uint8Workload(descriptor, info) -{ - const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); - - m_KernelTensor = std::make_unique(); - BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasTensor = std::make_unique(); - BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), m_Data.m_Parameters.m_DataLayout); - } - - arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, - m_Data.m_Parameters.m_StrideY, - m_Data.m_Parameters.m_PadLeft, - m_Data.m_Parameters.m_PadRight, - m_Data.m_Parameters.m_PadTop, - m_Data.m_Parameters.m_PadBottom, - arm_compute::DimensionRoundingType::FLOOR); - - m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionUint8Workload", 1, 1); - - arm_compute::ITensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - - arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); - input.info()->set_data_layout(aclDataLayout); - output.info()->set_data_layout(aclDataLayout); - - bool use3x3Optimisation = weightInfo.GetShape()[3] == 3 && weightInfo.GetShape()[2] == 3; - if (use3x3Optimisation) - { - m_pDepthwiseConvolutionLayer = std::make_unique(); - static_cast( - m_pDepthwiseConvolutionLayer.get())->configure(&input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo); - } - else - { - m_pDepthwiseConvolutionLayer = std::make_unique(); - static_cast( - m_pDepthwiseConvolutionLayer.get())->configure(&input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo); - } - - BOOST_ASSERT(m_pDepthwiseConvolutionLayer); - - InitializeArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight); - - if (m_BiasTensor) - { - InitializeArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias); - } - - m_pDepthwiseConvolutionLayer->prepare(); - FreeUnusedTensors(); -} - -void NeonDepthwiseConvolutionUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionUint8Workload_Execute"); - BOOST_ASSERT(m_pDepthwiseConvolutionLayer); - - m_pDepthwiseConvolutionLayer->run(); -} - -void NeonDepthwiseConvolutionUint8Workload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_KernelTensor); - FreeTensorIfUnused(m_BiasTensor); -} - -} //namespace armnn diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.hpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.hpp deleted file mode 100644 index 90cf8b0091..0000000000 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include - -namespace armnn -{ - -class NeonDepthwiseConvolutionUint8Workload : public Uint8Workload -{ -public: - NeonDepthwiseConvolutionUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable std::unique_ptr m_pDepthwiseConvolutionLayer; - - std::unique_ptr m_KernelTensor; - std::unique_ptr m_BiasTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp new file mode 100644 index 0000000000..8b1feaa93c --- /dev/null +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp @@ -0,0 +1,135 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonDepthwiseConvolutionWorkload.hpp" + +#include +#include +#include + +namespace armnn +{ + +using namespace armcomputetensorutils; + +arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const Optional& biases) +{ + const arm_compute::TensorInfo aclInputInfo = + BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclOutputInfo = + BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclWeightsInfo = + BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + + arm_compute::TensorInfo aclBiasesInfo; + arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; + + if (descriptor.m_BiasEnabled) + { + BOOST_ASSERT(biases.has_value()); + + aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout); + optionalAclBiasesInfo = &aclBiasesInfo; + } + + const arm_compute::PadStrideInfo aclPadStrideInfo = + BuildArmComputePadStrideInfo(descriptor); + const unsigned int aclDepthMultiplier = weights.GetShape()[0]; + + return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo, + &aclWeightsInfo, + optionalAclBiasesInfo, + &aclOutputInfo, + aclPadStrideInfo, + aclDepthMultiplier); +} + +NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : BaseWorkload(descriptor, info) +{ + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), m_Data.m_Parameters.m_DataLayout); + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionWorkload", 1, 1); + + arm_compute::ITensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); + input.info()->set_data_layout(aclDataLayout); + output.info()->set_data_layout(aclDataLayout); + + bool use3x3Optimisation = weightInfo.GetShape()[3] == 3 && weightInfo.GetShape()[2] == 3; + if (use3x3Optimisation) + { + m_pDepthwiseConvolutionLayer = std::make_unique(); + static_cast( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + } + else + { + m_pDepthwiseConvolutionLayer = std::make_unique(); + static_cast( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + } + + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + InitializeArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight); + + if (m_BiasTensor) + { + InitializeArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias); + } + + m_pDepthwiseConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void NeonDepthwiseConvolutionWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionWorkload_Execute"); + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + m_pDepthwiseConvolutionLayer->run(); +} + +void NeonDepthwiseConvolutionWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp new file mode 100644 index 0000000000..fb93fe8ea0 --- /dev/null +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp @@ -0,0 +1,36 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const Optional& biases); + +class NeonDepthwiseConvolutionWorkload : public BaseWorkload +{ +public: + NeonDepthwiseConvolutionWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + + virtual void Execute() const override; + +private: + mutable std::unique_ptr m_pDepthwiseConvolutionLayer; + + std::unique_ptr m_KernelTensor; + std::unique_ptr m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} // namespace armnn diff --git a/src/backends/neon/workloads/NeonWorkloads.hpp b/src/backends/neon/workloads/NeonWorkloads.hpp index 688b1f5828..e55cf0d332 100644 --- a/src/backends/neon/workloads/NeonWorkloads.hpp +++ b/src/backends/neon/workloads/NeonWorkloads.hpp @@ -11,8 +11,7 @@ #include "NeonConvertFp16ToFp32Workload.hpp" #include "NeonConvertFp32ToFp16Workload.hpp" #include "NeonConvolution2dWorkload.hpp" -#include "NeonDepthwiseConvolutionFloatWorkload.hpp" -#include "NeonDepthwiseConvolutionUint8Workload.hpp" +#include "NeonDepthwiseConvolutionWorkload.hpp" #include "NeonFloorFloatWorkload.hpp" #include "NeonFullyConnectedWorkload.hpp" #include "NeonL2NormalizationFloatWorkload.hpp" -- cgit v1.2.1