From e448be3ac55897a3eabe85962891f8414f8e3cf9 Mon Sep 17 00:00:00 2001 From: kevmay01 Date: Wed, 26 Sep 2018 10:21:55 +0100 Subject: IVGCVSW-1927 Add Neon 8-bit FullyConnected support Change-Id: Idf4cc7a9a7d3261b9eceb653b999257506cdae76 --- src/backends/NeonLayerSupport.cpp | 2 +- src/backends/NeonWorkloadFactory.cpp | 4 +- src/backends/NeonWorkloads.hpp | 2 +- src/backends/NeonWorkloads/CMakeLists.txt | 4 +- .../NeonFullyConnectedFloatWorkload.cpp | 96 ------------------ .../NeonFullyConnectedFloatWorkload.hpp | 40 -------- .../NeonWorkloads/NeonFullyConnectedWorkload.cpp | 110 +++++++++++++++++++++ .../NeonWorkloads/NeonFullyConnectedWorkload.hpp | 40 ++++++++ src/backends/NeonWorkloads/backend.mk | 2 +- src/backends/test/ArmComputeNeon.cpp | 2 + src/backends/test/CreateWorkloadNeon.cpp | 4 +- 11 files changed, 161 insertions(+), 145 deletions(-) delete mode 100644 src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.cpp delete mode 100644 src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonFullyConnectedWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonFullyConnectedWorkload.hpp diff --git a/src/backends/NeonLayerSupport.cpp b/src/backends/NeonLayerSupport.cpp index 30956dfba0..f06db1747d 100644 --- a/src/backends/NeonLayerSupport.cpp +++ b/src/backends/NeonLayerSupport.cpp @@ -23,7 +23,7 @@ #include "NeonWorkloads/NeonL2NormalizationFloatWorkload.hpp" #include "NeonWorkloads/NeonMultiplicationFloatWorkload.hpp" #include "NeonWorkloads/NeonNormalizationFloatWorkload.hpp" -#include "NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp" +#include "NeonWorkloads/NeonFullyConnectedWorkload.hpp" #include "NeonWorkloads/NeonPermuteWorkload.hpp" #include "NeonWorkloads/NeonPooling2dBaseWorkload.hpp" #include "NeonWorkloads/NeonSoftmaxBaseWorkload.hpp" diff --git a/src/backends/NeonWorkloadFactory.cpp b/src/backends/NeonWorkloadFactory.cpp index c989121eac..3b994bf049 100644 --- a/src/backends/NeonWorkloadFactory.cpp +++ b/src/backends/NeonWorkloadFactory.cpp @@ -116,8 +116,8 @@ std::unique_ptr NeonWorkloadFactory::CreateMerger(const Merger std::unique_ptr NeonWorkloadFactory::CreateFullyConnected( const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload(descriptor, info, - m_MemoryManager.GetIntraLayerManager()); + return MakeWorkload(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); } std::unique_ptr NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, diff --git a/src/backends/NeonWorkloads.hpp b/src/backends/NeonWorkloads.hpp index 676c23cc4d..e471bbcf35 100644 --- a/src/backends/NeonWorkloads.hpp +++ b/src/backends/NeonWorkloads.hpp @@ -21,7 +21,7 @@ #include "backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.hpp" #include "backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp" #include "backends/NeonWorkloads/NeonFloorFloatWorkload.hpp" -#include "backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp" +#include "backends/NeonWorkloads/NeonFullyConnectedWorkload.hpp" #include "backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.hpp" #include "backends/NeonWorkloads/NeonLstmFloatWorkload.hpp" #include "backends/NeonWorkloads/NeonMergerFloatWorkload.hpp" diff --git a/src/backends/NeonWorkloads/CMakeLists.txt b/src/backends/NeonWorkloads/CMakeLists.txt index 02cb53dff1..ca44bcc1e1 100644 --- a/src/backends/NeonWorkloads/CMakeLists.txt +++ b/src/backends/NeonWorkloads/CMakeLists.txt @@ -37,8 +37,8 @@ list(APPEND armnnNeonBackend_sources NeonDepthwiseConvolutionUint8Workload.hpp NeonFloorFloatWorkload.cpp NeonFloorFloatWorkload.hpp - NeonFullyConnectedFloatWorkload.cpp - NeonFullyConnectedFloatWorkload.hpp + NeonFullyConnectedWorkload.cpp + NeonFullyConnectedWorkload.hpp NeonL2NormalizationFloatWorkload.cpp NeonL2NormalizationFloatWorkload.hpp NeonLstmFloatWorkload.cpp diff --git a/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.cpp deleted file mode 100644 index 8fcc0956e2..0000000000 --- a/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.cpp +++ /dev/null @@ -1,96 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NeonFullyConnectedFloatWorkload.hpp" - -#include -#include -#include - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& weights, - const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); - const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); - - arm_compute::TensorInfo aclBiases; - arm_compute::TensorInfo *optionalAclBiases = nullptr; - if (descriptor.m_BiasEnabled) - { - aclBiases = BuildArmComputeTensorInfo(biases); - optionalAclBiases = &aclBiases; - } - - const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo = - ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor); - - - return arm_compute::NEFullyConnectedLayer::validate(&aclInput, - &aclWeights, - optionalAclBiases, - &aclOutput, - fullyConnectedLayerInfo); -} - -NeonFullyConnectedFloatWorkload::NeonFullyConnectedFloatWorkload(const FullyConnectedQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr& memoryManager) - : FloatWorkload(descriptor, info) - , m_FullyConnectedLayer(memoryManager) -{ - m_Data.ValidateInputsOutputs("NeonFullyConnectedFloatWorkload", 1, 1); - - arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); - - m_WeightsTensor = std::make_unique(); - BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasesTensor = std::make_unique(); - BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); - } - - // Construct - arm_compute::FullyConnectedLayerInfo fc_info; - fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix; - m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); - - // Allocate - InitializeArmComputeTensorDataForFloatTypes(*m_WeightsTensor, m_Data.m_Weight); - - if (m_BiasesTensor) - { - InitializeArmComputeTensorDataForFloatTypes(*m_BiasesTensor, m_Data.m_Bias); - } - - // Force Compute Library to perform the necessary copying and reshaping, after which - // delete all the input tensors that will no longer be needed - m_FullyConnectedLayer.prepare(); - FreeUnusedTensors(); -} - -void NeonFullyConnectedFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFullyConnectedFloatWorkload_Execute"); - m_FullyConnectedLayer.run(); -} - -void NeonFullyConnectedFloatWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_WeightsTensor); - FreeTensorIfUnused(m_BiasesTensor); -} - -} //namespace armnn - diff --git a/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp deleted file mode 100644 index 27e5717b04..0000000000 --- a/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp +++ /dev/null @@ -1,40 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include - -#include "arm_compute/runtime/MemoryManagerOnDemand.h" - -#include - -namespace armnn -{ - -arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& weights, - const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor); - -class NeonFullyConnectedFloatWorkload : public FloatWorkload -{ -public: - NeonFullyConnectedFloatWorkload(const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr& memoryManager); - virtual void Execute() const override; - -private: - mutable arm_compute::NEFullyConnectedLayer m_FullyConnectedLayer; - - std::unique_ptr m_WeightsTensor; - std::unique_ptr m_BiasesTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn - diff --git a/src/backends/NeonWorkloads/NeonFullyConnectedWorkload.cpp b/src/backends/NeonWorkloads/NeonFullyConnectedWorkload.cpp new file mode 100644 index 0000000000..8cebb4f48f --- /dev/null +++ b/src/backends/NeonWorkloads/NeonFullyConnectedWorkload.cpp @@ -0,0 +1,110 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonFullyConnectedWorkload.hpp" + +#include +#include +#include + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); + + arm_compute::TensorInfo aclBiases; + arm_compute::TensorInfo *optionalAclBiases = nullptr; + if (descriptor.m_BiasEnabled) + { + aclBiases = BuildArmComputeTensorInfo(biases); + optionalAclBiases = &aclBiases; + } + + const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo = + ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor); + + + return arm_compute::NEFullyConnectedLayer::validate(&aclInput, + &aclWeights, + optionalAclBiases, + &aclOutput, + fullyConnectedLayerInfo); +} + +NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr& memoryManager) + : BaseWorkload(descriptor, info) + , m_FullyConnectedLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonFullyConnectedWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_WeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasesTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); + } + + // Construct + arm_compute::FullyConnectedLayerInfo fc_info; + fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix; + m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); + + // Allocate + if (m_Data.m_Weight->GetTensorInfo().GetDataType() == DataType::QuantisedAsymm8) + { + InitialiseArmComputeTensorData(*m_WeightsTensor, m_Data.m_Weight->GetConstTensor()); + } + else + { + InitializeArmComputeTensorDataForFloatTypes(*m_WeightsTensor, m_Data.m_Weight); + } + + if (m_BiasesTensor) + { + if (m_Data.m_Bias->GetTensorInfo().GetDataType() == DataType::Signed32) + { + InitialiseArmComputeTensorData(*m_BiasesTensor, m_Data.m_Bias->GetConstTensor()); + } + else + { + InitializeArmComputeTensorDataForFloatTypes(*m_BiasesTensor, m_Data.m_Bias); + } + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_FullyConnectedLayer.prepare(); + FreeUnusedTensors(); +} + +void NeonFullyConnectedWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFullyConnectedWorkload_Execute"); + m_FullyConnectedLayer.run(); +} + +void NeonFullyConnectedWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_WeightsTensor); + FreeTensorIfUnused(m_BiasesTensor); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonFullyConnectedWorkload.hpp b/src/backends/NeonWorkloads/NeonFullyConnectedWorkload.hpp new file mode 100644 index 0000000000..11991f87b5 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonFullyConnectedWorkload.hpp @@ -0,0 +1,40 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include + +namespace armnn +{ + +arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor); + +class NeonFullyConnectedWorkload : public BaseWorkload +{ +public: + NeonFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + virtual void Execute() const override; + +private: + mutable arm_compute::NEFullyConnectedLayer m_FullyConnectedLayer; + + std::unique_ptr m_WeightsTensor; + std::unique_ptr m_BiasesTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/backend.mk b/src/backends/NeonWorkloads/backend.mk index 06525c9df2..4c7fbe6fe0 100644 --- a/src/backends/NeonWorkloads/backend.mk +++ b/src/backends/NeonWorkloads/backend.mk @@ -19,7 +19,7 @@ BACKEND_SOURCES := \ NeonDepthwiseConvolutionFloatWorkload.cpp \ NeonDepthwiseConvolutionUint8Workload.cpp \ NeonFloorFloatWorkload.cpp \ - NeonFullyConnectedFloatWorkload.cpp \ + NeonFullyConnectedWorkload.cpp \ NeonL2NormalizationFloatWorkload.cpp \ NeonLstmFloatWorkload.cpp \ NeonMergerFloatWorkload.cpp \ diff --git a/src/backends/test/ArmComputeNeon.cpp b/src/backends/test/ArmComputeNeon.cpp index f1a2cf65bd..bc3b6b5def 100644 --- a/src/backends/test/ArmComputeNeon.cpp +++ b/src/backends/test/ArmComputeNeon.cpp @@ -316,6 +316,8 @@ ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithBias, FullyConnectedFloat32Test, tr ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithTranspose, FullyConnectedFloat32Test, false, true) ARMNN_AUTO_TEST_CASE(FullyConnectedLarge, FullyConnectedLargeTest, false) ARMNN_AUTO_TEST_CASE(FullyConnectedLargeTransposed, FullyConnectedLargeTest, true) +ARMNN_AUTO_TEST_CASE(FullyConnectedUint8, FullyConnectedUint8Test, false) +ARMNN_AUTO_TEST_CASE(FullyConnectedBiasedUint8, FullyConnectedUint8Test, true) // Add ARMNN_AUTO_TEST_CASE(SimpleAdd, AdditionTest) diff --git a/src/backends/test/CreateWorkloadNeon.cpp b/src/backends/test/CreateWorkloadNeon.cpp index fbe064e1c4..ce62a02537 100644 --- a/src/backends/test/CreateWorkloadNeon.cpp +++ b/src/backends/test/CreateWorkloadNeon.cpp @@ -225,13 +225,13 @@ static void NeonCreateFullyConnectedWorkloadTest() #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16Workload) { - NeonCreateFullyConnectedWorkloadTest(); + NeonCreateFullyConnectedWorkloadTest(); } #endif BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkload) { - NeonCreateFullyConnectedWorkloadTest(); + NeonCreateFullyConnectedWorkloadTest(); } template -- cgit v1.2.1