diff options
author | kevmay01 <kevin.may@arm.com> | 2018-09-26 10:21:55 +0100 |
---|---|---|
committer | Matthew Bentham <matthew.bentham@arm.com> | 2018-10-10 16:16:57 +0100 |
commit | e448be3ac55897a3eabe85962891f8414f8e3cf9 (patch) | |
tree | 11e46d4979eb6d8d70c7f43d5cf690fc6f78d605 /src/backends | |
parent | 9fc824a596d6bddd27e5aa1438b115e71a117aa3 (diff) | |
download | armnn-e448be3ac55897a3eabe85962891f8414f8e3cf9.tar.gz |
IVGCVSW-1927 Add Neon 8-bit FullyConnected support
Change-Id: Idf4cc7a9a7d3261b9eceb653b999257506cdae76
Diffstat (limited to 'src/backends')
-rw-r--r-- | src/backends/NeonLayerSupport.cpp | 2 | ||||
-rw-r--r-- | src/backends/NeonWorkloadFactory.cpp | 4 | ||||
-rw-r--r-- | src/backends/NeonWorkloads.hpp | 2 | ||||
-rw-r--r-- | src/backends/NeonWorkloads/CMakeLists.txt | 4 | ||||
-rw-r--r-- | src/backends/NeonWorkloads/NeonFullyConnectedWorkload.cpp (renamed from src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.cpp) | 32 | ||||
-rw-r--r-- | src/backends/NeonWorkloads/NeonFullyConnectedWorkload.hpp (renamed from src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp) | 6 | ||||
-rw-r--r-- | src/backends/NeonWorkloads/backend.mk | 2 | ||||
-rw-r--r-- | src/backends/test/ArmComputeNeon.cpp | 2 | ||||
-rw-r--r-- | src/backends/test/CreateWorkloadNeon.cpp | 4 |
9 files changed, 37 insertions, 21 deletions
diff --git a/src/backends/NeonLayerSupport.cpp b/src/backends/NeonLayerSupport.cpp index 30956dfba0..f06db1747d 100644 --- a/src/backends/NeonLayerSupport.cpp +++ b/src/backends/NeonLayerSupport.cpp @@ -23,7 +23,7 @@ #include "NeonWorkloads/NeonL2NormalizationFloatWorkload.hpp" #include "NeonWorkloads/NeonMultiplicationFloatWorkload.hpp" #include "NeonWorkloads/NeonNormalizationFloatWorkload.hpp" -#include "NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp" +#include "NeonWorkloads/NeonFullyConnectedWorkload.hpp" #include "NeonWorkloads/NeonPermuteWorkload.hpp" #include "NeonWorkloads/NeonPooling2dBaseWorkload.hpp" #include "NeonWorkloads/NeonSoftmaxBaseWorkload.hpp" diff --git a/src/backends/NeonWorkloadFactory.cpp b/src/backends/NeonWorkloadFactory.cpp index c989121eac..3b994bf049 100644 --- a/src/backends/NeonWorkloadFactory.cpp +++ b/src/backends/NeonWorkloadFactory.cpp @@ -116,8 +116,8 @@ std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMerger(const Merger std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateFullyConnected( const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<NeonFullyConnectedFloatWorkload, NullWorkload>(descriptor, info, - m_MemoryManager.GetIntraLayerManager()); + return MakeWorkload<NeonFullyConnectedWorkload, NeonFullyConnectedWorkload>(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); } std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, diff --git a/src/backends/NeonWorkloads.hpp b/src/backends/NeonWorkloads.hpp index 676c23cc4d..e471bbcf35 100644 --- a/src/backends/NeonWorkloads.hpp +++ b/src/backends/NeonWorkloads.hpp @@ -21,7 +21,7 @@ #include "backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.hpp" #include "backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp" #include "backends/NeonWorkloads/NeonFloorFloatWorkload.hpp" -#include "backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp" +#include "backends/NeonWorkloads/NeonFullyConnectedWorkload.hpp" #include "backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.hpp" #include "backends/NeonWorkloads/NeonLstmFloatWorkload.hpp" #include "backends/NeonWorkloads/NeonMergerFloatWorkload.hpp" diff --git a/src/backends/NeonWorkloads/CMakeLists.txt b/src/backends/NeonWorkloads/CMakeLists.txt index 02cb53dff1..ca44bcc1e1 100644 --- a/src/backends/NeonWorkloads/CMakeLists.txt +++ b/src/backends/NeonWorkloads/CMakeLists.txt @@ -37,8 +37,8 @@ list(APPEND armnnNeonBackend_sources NeonDepthwiseConvolutionUint8Workload.hpp NeonFloorFloatWorkload.cpp NeonFloorFloatWorkload.hpp - NeonFullyConnectedFloatWorkload.cpp - NeonFullyConnectedFloatWorkload.hpp + NeonFullyConnectedWorkload.cpp + NeonFullyConnectedWorkload.hpp NeonL2NormalizationFloatWorkload.cpp NeonL2NormalizationFloatWorkload.hpp NeonLstmFloatWorkload.cpp diff --git a/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonFullyConnectedWorkload.cpp index 8fcc0956e2..8cebb4f48f 100644 --- a/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.cpp +++ b/src/backends/NeonWorkloads/NeonFullyConnectedWorkload.cpp @@ -3,7 +3,7 @@ // SPDX-License-Identifier: MIT // -#include "NeonFullyConnectedFloatWorkload.hpp" +#include "NeonFullyConnectedWorkload.hpp" #include <backends/aclCommon/ArmComputeTensorUtils.hpp> #include <backends/aclCommon/ArmComputeUtils.hpp> @@ -42,12 +42,12 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, fullyConnectedLayerInfo); } -NeonFullyConnectedFloatWorkload::NeonFullyConnectedFloatWorkload(const FullyConnectedQueueDescriptor& descriptor, +NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) - : FloatWorkload<FullyConnectedQueueDescriptor>(descriptor, info) + : BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info) , m_FullyConnectedLayer(memoryManager) { - m_Data.ValidateInputsOutputs("NeonFullyConnectedFloatWorkload", 1, 1); + m_Data.ValidateInputsOutputs("NeonFullyConnectedWorkload", 1, 1); arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); @@ -67,11 +67,25 @@ NeonFullyConnectedFloatWorkload::NeonFullyConnectedFloatWorkload(const FullyConn m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); // Allocate - InitializeArmComputeTensorDataForFloatTypes(*m_WeightsTensor, m_Data.m_Weight); + if (m_Data.m_Weight->GetTensorInfo().GetDataType() == DataType::QuantisedAsymm8) + { + InitialiseArmComputeTensorData(*m_WeightsTensor, m_Data.m_Weight->GetConstTensor<uint8_t>()); + } + else + { + InitializeArmComputeTensorDataForFloatTypes(*m_WeightsTensor, m_Data.m_Weight); + } if (m_BiasesTensor) { - InitializeArmComputeTensorDataForFloatTypes(*m_BiasesTensor, m_Data.m_Bias); + if (m_Data.m_Bias->GetTensorInfo().GetDataType() == DataType::Signed32) + { + InitialiseArmComputeTensorData(*m_BiasesTensor, m_Data.m_Bias->GetConstTensor<int32_t>()); + } + else + { + InitializeArmComputeTensorDataForFloatTypes(*m_BiasesTensor, m_Data.m_Bias); + } } // Force Compute Library to perform the necessary copying and reshaping, after which @@ -80,13 +94,13 @@ NeonFullyConnectedFloatWorkload::NeonFullyConnectedFloatWorkload(const FullyConn FreeUnusedTensors(); } -void NeonFullyConnectedFloatWorkload::Execute() const +void NeonFullyConnectedWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFullyConnectedFloatWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFullyConnectedWorkload_Execute"); m_FullyConnectedLayer.run(); } -void NeonFullyConnectedFloatWorkload::FreeUnusedTensors() +void NeonFullyConnectedWorkload::FreeUnusedTensors() { FreeTensorIfUnused(m_WeightsTensor); FreeTensorIfUnused(m_BiasesTensor); diff --git a/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonFullyConnectedWorkload.hpp index 27e5717b04..11991f87b5 100644 --- a/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp +++ b/src/backends/NeonWorkloads/NeonFullyConnectedWorkload.hpp @@ -20,11 +20,11 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, const TensorInfo& biases, const FullyConnectedDescriptor& descriptor); -class NeonFullyConnectedFloatWorkload : public FloatWorkload<FullyConnectedQueueDescriptor> +class NeonFullyConnectedWorkload : public BaseWorkload<FullyConnectedQueueDescriptor> { public: - NeonFullyConnectedFloatWorkload(const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + NeonFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); virtual void Execute() const override; private: diff --git a/src/backends/NeonWorkloads/backend.mk b/src/backends/NeonWorkloads/backend.mk index 06525c9df2..4c7fbe6fe0 100644 --- a/src/backends/NeonWorkloads/backend.mk +++ b/src/backends/NeonWorkloads/backend.mk @@ -19,7 +19,7 @@ BACKEND_SOURCES := \ NeonDepthwiseConvolutionFloatWorkload.cpp \ NeonDepthwiseConvolutionUint8Workload.cpp \ NeonFloorFloatWorkload.cpp \ - NeonFullyConnectedFloatWorkload.cpp \ + NeonFullyConnectedWorkload.cpp \ NeonL2NormalizationFloatWorkload.cpp \ NeonLstmFloatWorkload.cpp \ NeonMergerFloatWorkload.cpp \ diff --git a/src/backends/test/ArmComputeNeon.cpp b/src/backends/test/ArmComputeNeon.cpp index f1a2cf65bd..bc3b6b5def 100644 --- a/src/backends/test/ArmComputeNeon.cpp +++ b/src/backends/test/ArmComputeNeon.cpp @@ -316,6 +316,8 @@ ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithBias, FullyConnectedFloat32Test, tr ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithTranspose, FullyConnectedFloat32Test, false, true) ARMNN_AUTO_TEST_CASE(FullyConnectedLarge, FullyConnectedLargeTest, false) ARMNN_AUTO_TEST_CASE(FullyConnectedLargeTransposed, FullyConnectedLargeTest, true) +ARMNN_AUTO_TEST_CASE(FullyConnectedUint8, FullyConnectedUint8Test, false) +ARMNN_AUTO_TEST_CASE(FullyConnectedBiasedUint8, FullyConnectedUint8Test, true) // Add ARMNN_AUTO_TEST_CASE(SimpleAdd, AdditionTest) diff --git a/src/backends/test/CreateWorkloadNeon.cpp b/src/backends/test/CreateWorkloadNeon.cpp index fbe064e1c4..ce62a02537 100644 --- a/src/backends/test/CreateWorkloadNeon.cpp +++ b/src/backends/test/CreateWorkloadNeon.cpp @@ -225,13 +225,13 @@ static void NeonCreateFullyConnectedWorkloadTest() #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16Workload) { - NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedFloatWorkload, DataType::Float16>(); + NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::Float16>(); } #endif BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkload) { - NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedFloatWorkload, DataType::Float32>(); + NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::Float32>(); } template <typename NormalizationWorkloadType, typename armnn::DataType DataType> |