diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backends/backendsCommon/WorkloadData.cpp | 120 | ||||
-rw-r--r-- | src/backends/backendsCommon/test/layerTests/ArgMinMaxTestImpl.cpp | 118 | ||||
-rw-r--r-- | src/backends/backendsCommon/test/layerTests/ArgMinMaxTestImpl.hpp | 16 | ||||
-rw-r--r-- | src/backends/neon/NeonLayerSupport.cpp | 13 | ||||
-rw-r--r-- | src/backends/neon/NeonLayerSupport.hpp | 5 | ||||
-rw-r--r-- | src/backends/neon/NeonTensorHandle.hpp | 16 | ||||
-rw-r--r-- | src/backends/neon/NeonWorkloadFactory.cpp | 6 | ||||
-rw-r--r-- | src/backends/neon/NeonWorkloadFactory.hpp | 3 | ||||
-rw-r--r-- | src/backends/neon/backend.mk | 1 | ||||
-rw-r--r-- | src/backends/neon/test/NeonLayerTests.cpp | 8 | ||||
-rw-r--r-- | src/backends/neon/workloads/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp | 79 | ||||
-rw-r--r-- | src/backends/neon/workloads/NeonArgMinMaxWorkload.hpp | 29 | ||||
-rw-r--r-- | src/backends/neon/workloads/NeonWorkloads.hpp | 1 | ||||
-rw-r--r-- | src/backends/reference/test/RefLayerTests.cpp | 26 |
15 files changed, 340 insertions, 103 deletions
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp index 3fbdec7bf9..e49fd09be0 100644 --- a/src/backends/backendsCommon/WorkloadData.cpp +++ b/src/backends/backendsCommon/WorkloadData.cpp @@ -450,10 +450,10 @@ void ActivationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const std::vector<DataType> supportedTypes = { - DataType::Float16, - DataType::Float32, - DataType::QuantisedAsymm8, - DataType::QuantisedSymm16 + DataType::Float16, + DataType::Float32, + DataType::QuantisedAsymm8, + DataType::QuantisedSymm16 }; ValidateDataTypes(inputTensorInfo, supportedTypes, descriptorName); @@ -476,15 +476,15 @@ void ArgMinMaxQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const throw InvalidArgumentException(descriptorName + ": Output of ArgMinMax layer must be Int32."); } - std::vector<DataType> supportedTypes = - { - DataType::Float16, - DataType::Float32, - DataType::QuantisedAsymm8, - DataType::QuantisedSymm16 - }; + std::vector<DataType> supportedInputTypes = + { + DataType::Float16, + DataType::Float32, + DataType::QuantisedAsymm8, + DataType::QuantisedSymm16 + }; - ValidateDataTypes(inputTensorInfo, supportedTypes, descriptorName); + ValidateDataTypes(inputTensorInfo, supportedInputTypes, descriptorName); } void SoftmaxQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const @@ -499,10 +499,10 @@ void SoftmaxQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const std::vector<DataType> supportedTypes = { - DataType::Float16, - DataType::Float32, - DataType::QuantisedAsymm8, - DataType::QuantisedSymm16 + DataType::Float16, + DataType::Float32, + DataType::QuantisedAsymm8, + DataType::QuantisedSymm16 }; ValidateDataTypes(inputTensorInfo, supportedTypes, descriptorName); @@ -519,12 +519,12 @@ void SplitterQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const // Check the supported data types std::vector<DataType> supportedTypes = { - DataType::Float32, - DataType::Float16, - DataType::Boolean, - DataType::Signed32, - DataType::QuantisedAsymm8, - DataType::QuantisedSymm16 + DataType::Float32, + DataType::Float16, + DataType::Boolean, + DataType::Signed32, + DataType::QuantisedAsymm8, + DataType::QuantisedSymm16 }; const TensorInfo& inputTensorInfo = workloadInfo.m_InputTensorInfos[0]; @@ -655,12 +655,12 @@ void ConcatQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const // Check the supported data types std::vector<DataType> supportedTypes = { - DataType::Float32, - DataType::Float16, - DataType::Boolean, - DataType::Signed32, - DataType::QuantisedAsymm8, - DataType::QuantisedSymm16 + DataType::Float32, + DataType::Float16, + DataType::Boolean, + DataType::Signed32, + DataType::QuantisedAsymm8, + DataType::QuantisedSymm16 }; const TensorInfo& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0]; @@ -742,12 +742,12 @@ void StackQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const // Check the supported data types std::vector<DataType> supportedTypes = { - DataType::Float32, - DataType::Float16, - DataType::Boolean, - DataType::Signed32, - DataType::QuantisedAsymm8, - DataType::QuantisedSymm16 + DataType::Float32, + DataType::Float16, + DataType::Boolean, + DataType::Signed32, + DataType::QuantisedAsymm8, + DataType::QuantisedSymm16 }; ValidateDataTypes(workloadInfo.m_InputTensorInfos[0], supportedTypes, descriptorName); @@ -805,10 +805,10 @@ void FullyConnectedQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) c // Check the supported data types std::vector<DataType> supportedTypes = { - DataType::Float32, - DataType::Float16, - DataType::QuantisedAsymm8, - DataType::QuantisedSymm16 + DataType::Float32, + DataType::Float16, + DataType::QuantisedAsymm8, + DataType::QuantisedSymm16 }; ValidateDataTypes(inputTensorInfo, supportedTypes, descriptorName); @@ -1937,8 +1937,8 @@ void QuantizeQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const std::vector<DataType> supportedTypes = { - DataType::Float32, - DataType::Float16 + DataType::Float32, + DataType::Float16 }; ValidateDataTypes(inputTensorInfo, supportedTypes, descriptorName); @@ -1962,10 +1962,10 @@ void BatchToSpaceNdQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) c std::vector<DataType> supportedTypes = { - DataType::Float32, - DataType::Float16, - DataType::QuantisedAsymm8, - DataType::QuantisedSymm16 + DataType::Float32, + DataType::Float16, + DataType::QuantisedAsymm8, + DataType::QuantisedSymm16 }; ValidateDataTypes(inputTensorInfo, supportedTypes, descriptorName); @@ -2129,10 +2129,10 @@ void RsqrtQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const std::vector<DataType> supportedTypes = { - DataType::Float16, - DataType::Float32, - DataType::QuantisedAsymm8, - DataType::QuantisedSymm16 + DataType::Float16, + DataType::Float32, + DataType::QuantisedAsymm8, + DataType::QuantisedSymm16 }; ValidateDataTypes(inputTensorInfo, supportedTypes, descriptorName); @@ -2157,10 +2157,10 @@ void GatherQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const std::vector<DataType> supportedTypes = { - DataType::Float16, - DataType::Float32, - DataType::QuantisedAsymm8, - DataType::QuantisedSymm16 + DataType::Float16, + DataType::Float32, + DataType::QuantisedAsymm8, + DataType::QuantisedSymm16 }; ValidateDataTypes(inputTensorInfo, supportedTypes, descriptorName); @@ -2255,8 +2255,8 @@ void DequantizeQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const std::vector<DataType> supportedTypes = { - DataType::Float32, - DataType::Float16 + DataType::Float32, + DataType::Float16 }; ValidateDataTypes(outputTensorInfo, supportedTypes, descriptorName); @@ -2578,12 +2578,12 @@ void AbsQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const ValidateTensorShapesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output"); std::vector<DataType> supportedTypes = - { - DataType::Float16, - DataType::Float32, - DataType::QuantisedAsymm8, - DataType::QuantisedSymm16 - }; + { + DataType::Float16, + DataType::Float32, + DataType::QuantisedAsymm8, + DataType::QuantisedSymm16 + }; ValidateDataTypes(inputTensorInfo, supportedTypes, descriptorName); ValidateTensorDataTypesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output"); diff --git a/src/backends/backendsCommon/test/layerTests/ArgMinMaxTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ArgMinMaxTestImpl.cpp index 9c07599387..e023d60bf0 100644 --- a/src/backends/backendsCommon/test/layerTests/ArgMinMaxTestImpl.cpp +++ b/src/backends/backendsCommon/test/layerTests/ArgMinMaxTestImpl.cpp @@ -30,7 +30,6 @@ LayerTestResult<int32_t, 3> ArgMinMaxTestCommon( auto inputTensor = MakeTensor<T, 4>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputData, inputTensorInfo)); LayerTestResult<int32_t, 3> result(outputTensorInfo); - result.outputExpected = MakeTensor<int32_t, 3>(outputTensorInfo, outputData); std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); @@ -57,7 +56,6 @@ LayerTestResult<int32_t, 3> ArgMinMaxTestCommon( CopyDataFromITensorHandle(&result.output[0][0][0], outputHandle.get()); return result; - } } // namespace @@ -86,7 +84,7 @@ LayerTestResult<int32_t, 3> ArgMaxSimpleTest( return ArgMinMaxTestCommon<ArmnnType>(workloadFactory, memoryManager, armnn::ArgMinMaxFunction::Max, inputTensorInfo, outputTensorInfo, - inputValues, outputValues, 3); + inputValues, outputValues, -1); // axis -1 === 3 } template<armnn::DataType ArmnnType, typename T> @@ -117,12 +115,12 @@ LayerTestResult<int32_t, 3> ArgMinSimpleTest( } template<armnn::DataType ArmnnType, typename T> -LayerTestResult<int32_t, 3> ArgMinChannel4dTest( +LayerTestResult<int32_t, 3> ArgMinChannelTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) { const armnn::TensorShape inputShape{ 1, 3, 2, 4}; - const armnn::TensorShape outputShape{ 1, 2, 4 }; // C=1,2,4 H =1,3,4 W=1,3,2 + const armnn::TensorShape outputShape{ 1, 2, 4 }; armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); @@ -134,15 +132,14 @@ LayerTestResult<int32_t, 3> ArgMinChannel4dTest( armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Signed32); - std::vector<float> inputValues({ 1.0f, 2.0f, 3.0f, 4.0f, - 5.0f, 6.0f, 7.0f, 8.0f, + std::vector<float> inputValues({ 1.0f, 2.0f, 3.0f, 4.0f, + 5.0f, 6.0f, 7.0f, 8.0f, - 10.0f, 20.0f, 30.0f, 40.0f, - 50.0f, 60.0f, 70.0f, 80.0f, + 10.0f, 20.0f, 30.0f, 40.0f, + 50.0f, 60.0f, 70.0f, 80.0f, 100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f }); - std::vector<int32_t> outputValues({ 0, 0, 0, 0, 0, 0, 0, 0 }); @@ -153,7 +150,7 @@ LayerTestResult<int32_t, 3> ArgMinChannel4dTest( } template<armnn::DataType ArmnnType, typename T> -LayerTestResult<int32_t, 3> ArgMaxChannel4dTest( +LayerTestResult<int32_t, 3> ArgMaxChannelTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) { @@ -170,15 +167,14 @@ LayerTestResult<int32_t, 3> ArgMaxChannel4dTest( armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Signed32); - std::vector<float> inputValues({ 1.0f, 2.0f, 3.0f, 4.0f, - 5.0f, 6.0f, 7.0f, 8.0f, + std::vector<float> inputValues({ 1.0f, 2.0f, 3.0f, 4.0f, + 5.0f, 6.0f, 7.0f, 8.0f, 10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f, - 100.0f, 200.0f, 300.0f, 400.0f, - 500.0f, 600.0f, 700.0f, 800.0f }); - + 100.0f, 200.0f, 300.0f, 400.0f, + 500.0f, 600.0f, 700.0f, 800.0f }); std::vector<int32_t> outputValues({ 2, 2, 2, 2, 2, 2, 2, 2 }); @@ -188,6 +184,64 @@ LayerTestResult<int32_t, 3> ArgMaxChannel4dTest( inputValues, outputValues, 1); } +template<armnn::DataType ArmnnType, typename T> +LayerTestResult<int32_t, 3> ArgMaxHeightTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + const armnn::TensorShape inputShape{ 1, 3, 2, 4}; + const armnn::TensorShape outputShape{ 3, 1, 4 }; + + armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); + armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Signed32); + + std::vector<float> inputValues({ 1.0f, 2.0f, 3.0f, 4.0f, + 5.0f, 6.0f, 7.0f, 8.0f, + + 10.0f, 20.0f, 30.0f, 40.0f, + 50.0f, 60.0f, 70.0f, 80.0f, + + 100.0f, 200.0f, 300.0f, 400.0f, + 500.0f, 600.0f, 700.0f, 800.0f }); + std::vector<int32_t> outputValues({ 1, 1, 1, 1, + 1, 1, 1, 1, + 1, 1, 1, 1 }); + + return ArgMinMaxTestCommon<ArmnnType>(workloadFactory, memoryManager, + armnn::ArgMinMaxFunction::Max, + inputTensorInfo, outputTensorInfo, + inputValues, outputValues, 2); +} + +template<armnn::DataType ArmnnType, typename T> +LayerTestResult<int32_t, 3> ArgMinWidthTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + const armnn::TensorShape inputShape{ 1, 3, 2, 4}; + const armnn::TensorShape outputShape{ 3, 2, 1 }; + + armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); + armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Signed32); + + std::vector<float> inputValues({ 1.0f, 2.0f, 3.0f, 4.0f, + 5.0f, 6.0f, 7.0f, 8.0f, + + 10.0f, 20.0f, 30.0f, 40.0f, + 50.0f, 60.0f, 70.0f, 80.0f, + + 100.0f, 200.0f, 300.0f, 400.0f, + 500.0f, 600.0f, 700.0f, 800.0f }); + std::vector<int32_t> outputValues({ 0, 0, + 0, 0, + 0, 0 }); + + return ArgMinMaxTestCommon<ArmnnType>(workloadFactory, memoryManager, + armnn::ArgMinMaxFunction::Min, + inputTensorInfo, outputTensorInfo, + inputValues, outputValues, 3); +} + // Explicit template specializations @@ -197,22 +251,22 @@ ArgMaxSimpleTest<armnn::DataType::Float32>( const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); template LayerTestResult<int32_t, 3> -ArgMinSimpleTest<armnn::DataType::Float32>( +ArgMaxSimpleTest<armnn::DataType::QuantisedAsymm8>( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); template LayerTestResult<int32_t, 3> -ArgMaxSimpleTest<armnn::DataType::QuantisedAsymm8>( +ArgMaxSimpleTest<armnn::DataType::QuantisedSymm16>( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); template LayerTestResult<int32_t, 3> -ArgMinSimpleTest<armnn::DataType::QuantisedAsymm8>( +ArgMinSimpleTest<armnn::DataType::Float32>( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); template LayerTestResult<int32_t, 3> -ArgMaxSimpleTest<armnn::DataType::QuantisedSymm16>( +ArgMinSimpleTest<armnn::DataType::QuantisedAsymm8>( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); @@ -222,31 +276,41 @@ ArgMinSimpleTest<armnn::DataType::QuantisedSymm16>( const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); template LayerTestResult<int32_t, 3> -ArgMinChannel4dTest<armnn::DataType::Float32>( +ArgMinChannelTest<armnn::DataType::Float32>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +template LayerTestResult<int32_t, 3> +ArgMinChannelTest<armnn::DataType::QuantisedAsymm8>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +template LayerTestResult<int32_t, 3> +ArgMinChannelTest<armnn::DataType::QuantisedSymm16>( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); template LayerTestResult<int32_t, 3> -ArgMinChannel4dTest<armnn::DataType::QuantisedAsymm8>( +ArgMaxChannelTest<armnn::DataType::Float32>( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); template LayerTestResult<int32_t, 3> -ArgMinChannel4dTest<armnn::DataType::QuantisedSymm16>( +ArgMaxChannelTest<armnn::DataType::QuantisedAsymm8>( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); template LayerTestResult<int32_t, 3> -ArgMaxChannel4dTest<armnn::DataType::Float32>( +ArgMaxChannelTest<armnn::DataType::QuantisedSymm16>( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); template LayerTestResult<int32_t, 3> -ArgMaxChannel4dTest<armnn::DataType::QuantisedAsymm8>( +ArgMaxHeightTest<armnn::DataType::Float32>( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); template LayerTestResult<int32_t, 3> -ArgMaxChannel4dTest<armnn::DataType::QuantisedSymm16>( +ArgMinWidthTest<armnn::DataType::Float32>( armnn::IWorkloadFactory& workloadFactory, - const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
\ No newline at end of file + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); diff --git a/src/backends/backendsCommon/test/layerTests/ArgMinMaxTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ArgMinMaxTestImpl.hpp index 79d77d41b2..b3bd7db703 100644 --- a/src/backends/backendsCommon/test/layerTests/ArgMinMaxTestImpl.hpp +++ b/src/backends/backendsCommon/test/layerTests/ArgMinMaxTestImpl.hpp @@ -21,9 +21,17 @@ LayerTestResult<int32_t, 3> ArgMinSimpleTest(armnn::IWorkloadFactory& workloadFa const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> -LayerTestResult<int32_t, 3> ArgMinChannel4dTest(armnn::IWorkloadFactory& workloadFactory, - const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); +LayerTestResult<int32_t, 3> ArgMinChannelTest(armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> -LayerTestResult<int32_t, 3> ArgMaxChannel4dTest(armnn::IWorkloadFactory& workloadFactory, - const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
\ No newline at end of file +LayerTestResult<int32_t, 3> ArgMaxChannelTest(armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<int32_t, 3> ArgMaxHeightTest(armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<int32_t, 3> ArgMinWidthTest(armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
\ No newline at end of file diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp index b713abaef3..2f3643f210 100644 --- a/src/backends/neon/NeonLayerSupport.cpp +++ b/src/backends/neon/NeonLayerSupport.cpp @@ -21,6 +21,7 @@ #include "workloads/NeonAbsWorkload.hpp" #include "workloads/NeonAdditionWorkload.hpp" #include "workloads/NeonActivationWorkload.hpp" +#include "workloads/NeonArgMinMaxWorkload.hpp" #include "workloads/NeonBatchNormalizationWorkload.hpp" #include "workloads/NeonConvolution2dWorkload.hpp" #include "workloads/NeonDepthToSpaceWorkload.hpp" @@ -146,6 +147,18 @@ bool NeonLayerSupport::IsAdditionSupported(const TensorInfo& input0, output); } +bool NeonLayerSupport::IsArgMinMaxSupported(const TensorInfo& input, + const TensorInfo& output, + const ArgMinMaxDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported) const +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonArgMinMaxWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor); +} + bool NeonLayerSupport::IsBatchNormalizationSupported(const TensorInfo& input, const TensorInfo& output, const TensorInfo& mean, diff --git a/src/backends/neon/NeonLayerSupport.hpp b/src/backends/neon/NeonLayerSupport.hpp index 31f9e576d7..76eb342eb9 100644 --- a/src/backends/neon/NeonLayerSupport.hpp +++ b/src/backends/neon/NeonLayerSupport.hpp @@ -26,6 +26,11 @@ public: const TensorInfo& output, Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; + bool IsArgMinMaxSupported(const TensorInfo& input, + const TensorInfo& output, + const ArgMinMaxDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; + bool IsBatchNormalizationSupported(const TensorInfo& input, const TensorInfo& output, const TensorInfo& mean, diff --git a/src/backends/neon/NeonTensorHandle.hpp b/src/backends/neon/NeonTensorHandle.hpp index 37013eb92d..ca5bfb04b1 100644 --- a/src/backends/neon/NeonTensorHandle.hpp +++ b/src/backends/neon/NeonTensorHandle.hpp @@ -186,6 +186,10 @@ private: armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), static_cast<int16_t*>(memory)); break; + case arm_compute::DataType::S32: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast<int32_t*>(memory)); + break; default: { throw armnn::UnimplementedException(); @@ -216,6 +220,10 @@ private: armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory), this->GetTensor()); break; + case arm_compute::DataType::S32: + armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory), + this->GetTensor()); + break; default: { throw armnn::UnimplementedException(); @@ -292,6 +300,10 @@ private: armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), static_cast<int16_t*>(memory)); break; + case arm_compute::DataType::S32: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast<int32_t*>(memory)); + break; default: { throw armnn::UnimplementedException(); @@ -318,6 +330,10 @@ private: armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory), this->GetTensor()); break; + case arm_compute::DataType::S32: + armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory), + this->GetTensor()); + break; default: { throw armnn::UnimplementedException(); diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp index f0b738c350..3492923f72 100644 --- a/src/backends/neon/NeonWorkloadFactory.cpp +++ b/src/backends/neon/NeonWorkloadFactory.cpp @@ -442,4 +442,10 @@ std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateStack(const StackQueueDesc return std::make_unique<NeonStackWorkload>(descriptor, info); } +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique<NeonArgMinMaxWorkload>(descriptor, info); +} + } // namespace armnn diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp index 4bdbc8e859..aad9cf949d 100644 --- a/src/backends/neon/NeonWorkloadFactory.hpp +++ b/src/backends/neon/NeonWorkloadFactory.hpp @@ -200,6 +200,9 @@ public: std::unique_ptr<IWorkload> CreateStack(const StackQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + std::unique_ptr<IWorkload> CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + private: mutable std::shared_ptr<NeonMemoryManager> m_MemoryManager; }; diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk index 3e46387ae4..fb10a0dcef 100644 --- a/src/backends/neon/backend.mk +++ b/src/backends/neon/backend.mk @@ -24,6 +24,7 @@ BACKEND_SOURCES := \ workloads/NeonAbsWorkload.cpp \ workloads/NeonActivationWorkload.cpp \ workloads/NeonAdditionWorkload.cpp \ + workloads/NeonArgMinMaxWorkload.cpp \ workloads/NeonBatchNormalizationWorkload.cpp \ workloads/NeonConcatWorkload.cpp \ workloads/NeonConstantWorkload.cpp \ diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp index 38771833bd..0d1faa9fc5 100644 --- a/src/backends/neon/test/NeonLayerTests.cpp +++ b/src/backends/neon/test/NeonLayerTests.cpp @@ -910,6 +910,14 @@ ARMNN_AUTO_TEST_CASE(Rsqrt3d, Rsqrt3dTest<DataType::Float32>) ARMNN_AUTO_TEST_CASE(RsqrtZero, RsqrtZeroTest<DataType::Float32>) ARMNN_AUTO_TEST_CASE(RsqrtNegative, RsqrtNegativeTest<DataType::Float32>) +// ArgMinMax +ARMNN_AUTO_TEST_CASE(ArgMinFloat32, ArgMinSimpleTest<DataType::Float32>) +ARMNN_AUTO_TEST_CASE(ArgMaxFloat32, ArgMaxSimpleTest<DataType::Float32>) +ARMNN_AUTO_TEST_CASE(ArgMinChannel, ArgMinChannelTest<DataType::Float32>) +ARMNN_AUTO_TEST_CASE(ArgMaxChannel, ArgMaxChannelTest<DataType::Float32>) +ARMNN_AUTO_TEST_CASE(ArgMaxHeight, ArgMaxHeightTest<DataType::Float32>) +ARMNN_AUTO_TEST_CASE(ArgMinWidth, ArgMinWidthTest<DataType::Float32>) + #if defined(ARMNNREF_ENABLED) // The ARMNN_COMPARE_REF_AUTO_TEST_CASE and the ARMNN_COMPARE_REF_FIXTURE_TEST_CASE test units are not available diff --git a/src/backends/neon/workloads/CMakeLists.txt b/src/backends/neon/workloads/CMakeLists.txt index 42ac641369..f8d5922b70 100644 --- a/src/backends/neon/workloads/CMakeLists.txt +++ b/src/backends/neon/workloads/CMakeLists.txt @@ -10,6 +10,8 @@ list(APPEND armnnNeonBackendWorkloads_sources NeonActivationWorkload.hpp NeonAdditionWorkload.cpp NeonAdditionWorkload.hpp + NeonArgMinMaxWorkload.cpp + NeonArgMinMaxWorkload.hpp NeonBatchNormalizationWorkload.cpp NeonBatchNormalizationWorkload.hpp NeonConcatWorkload.cpp diff --git a/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp b/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp new file mode 100644 index 0000000000..e8d537f2ef --- /dev/null +++ b/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp @@ -0,0 +1,79 @@ +// +// Copyright © 2019 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonArgMinMaxWorkload.hpp" +#include "NeonWorkloadUtils.hpp" + +#include <aclCommon/ArmComputeTensorUtils.hpp> +#include <backendsCommon/CpuTensorHandle.hpp> +#include <TensorUtils.hpp> + +#include <arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h> + +namespace +{ +unsigned int CalcAclAxis(unsigned int numDimensions, unsigned int axisIndex) +{ + return (numDimensions - axisIndex) - 1; +} + +} //namespace + +namespace armnn +{ + +arm_compute::Status NeonArgMinMaxWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ArgMinMaxDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + auto numDims = input.GetNumDimensions(); + auto unsignedAxis = armnnUtils::GetUnsignedAxis(numDims, descriptor.m_Axis); + int aclAxis = boost::numeric_cast<int>(CalcAclAxis(numDims, unsignedAxis)); + + if (descriptor.m_Function == ArgMinMaxFunction::Max) + { + return arm_compute::NEArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput, + arm_compute::ReductionOperation::ARG_IDX_MAX); + } + else + { + return arm_compute::NEArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput, + arm_compute::ReductionOperation::ARG_IDX_MIN); + } +} + + +NeonArgMinMaxWorkload::NeonArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descriptor, + const WorkloadInfo& info) + : BaseWorkload<ArgMinMaxQueueDescriptor>(descriptor, info) +{ + arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + auto numDims = info.m_InputTensorInfos[0].GetNumDimensions(); + auto unsignedAxis = armnnUtils::GetUnsignedAxis(numDims, m_Data.m_Parameters.m_Axis); + int aclAxis = boost::numeric_cast<int>(CalcAclAxis(numDims, unsignedAxis)); + + if (m_Data.m_Parameters.m_Function == ArgMinMaxFunction::Max) + { + m_ArgMinMaxLayer.configure(&input, aclAxis, &output, arm_compute::ReductionOperation::ARG_IDX_MAX); + } + else + { + m_ArgMinMaxLayer.configure(&input, aclAxis, &output, arm_compute::ReductionOperation::ARG_IDX_MIN); + } +} + +void NeonArgMinMaxWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonArgMinMaxWorkload_Execute"); + m_ArgMinMaxLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonArgMinMaxWorkload.hpp b/src/backends/neon/workloads/NeonArgMinMaxWorkload.hpp new file mode 100644 index 0000000000..6301b13718 --- /dev/null +++ b/src/backends/neon/workloads/NeonArgMinMaxWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2019 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backendsCommon/Workload.hpp> + +#include <arm_compute/core/Error.h> +#include <arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h> +namespace armnn +{ + +arm_compute::Status NeonArgMinMaxWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ArgMinMaxDescriptor& descriptor); + +class NeonArgMinMaxWorkload : public BaseWorkload<ArgMinMaxQueueDescriptor> +{ +public: + NeonArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEArgMinMaxLayer m_ArgMinMaxLayer; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonWorkloads.hpp b/src/backends/neon/workloads/NeonWorkloads.hpp index 9d35ed42fe..8044a4f004 100644 --- a/src/backends/neon/workloads/NeonWorkloads.hpp +++ b/src/backends/neon/workloads/NeonWorkloads.hpp @@ -7,6 +7,7 @@ #include "NeonAbsWorkload.hpp" #include "NeonActivationWorkload.hpp" #include "NeonAdditionWorkload.hpp" +#include "NeonArgMinMaxWorkload.hpp" #include "NeonBatchNormalizationWorkload.hpp" #include "NeonConstantWorkload.hpp" #include "NeonConvertFp16ToFp32Workload.hpp" diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp index 901017ad99..0058e15a8e 100644 --- a/src/backends/reference/test/RefLayerTests.cpp +++ b/src/backends/reference/test/RefLayerTests.cpp @@ -1025,18 +1025,20 @@ ARMNN_AUTO_TEST_CASE(AdditionAfterMaxPool, AdditionAfterMaxPoolTest) // ArgMinMax ARMNN_AUTO_TEST_CASE(ArgMaxFloat32, ArgMaxSimpleTest<DataType::Float32>) ARMNN_AUTO_TEST_CASE(ArgMinFloat32, ArgMinSimpleTest<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(ArgMinChannel4dFloat32, ArgMinChannel4dTest<DataType::Float32>) -ARMNN_AUTO_TEST_CASE(ArgMaxChannel4dFloat32, ArgMaxChannel4dTest<DataType::Float32>) - -ARMNN_AUTO_TEST_CASE(ArgMaxQuantisedAsymm8, ArgMaxSimpleTest<DataType::QuantisedAsymm8>) -ARMNN_AUTO_TEST_CASE(ArgMinQuantisedAsymm8, ArgMinSimpleTest<DataType::QuantisedAsymm8>) -ARMNN_AUTO_TEST_CASE(ArgMinChannel4dQuantisedAsymm8, ArgMinChannel4dTest<DataType::QuantisedAsymm8>) -ARMNN_AUTO_TEST_CASE(ArgMaxChannel4dQuantisedAsymm8, ArgMaxChannel4dTest<DataType::QuantisedAsymm8>) - -ARMNN_AUTO_TEST_CASE(ArgMaxQuantisedSymm16, ArgMaxSimpleTest<DataType::QuantisedSymm16>) -ARMNN_AUTO_TEST_CASE(ArgMinQuantisedSymm16, ArgMinSimpleTest<DataType::QuantisedSymm16>) -ARMNN_AUTO_TEST_CASE(ArgMinChannel4dQuantisedSymm16, ArgMinChannel4dTest<DataType::QuantisedSymm16>) -ARMNN_AUTO_TEST_CASE(ArgMaxChannel4dQuantisedSymm16, ArgMaxChannel4dTest<DataType::QuantisedSymm16>) +ARMNN_AUTO_TEST_CASE(ArgMinChannelFloat32, ArgMinChannelTest<DataType::Float32>) +ARMNN_AUTO_TEST_CASE(ArgMaxChannelFloat32, ArgMaxChannelTest<DataType::Float32>) +ARMNN_AUTO_TEST_CASE(ArgMaxHeightFloat32, ArgMaxHeightTest<DataType::Float32>) +ARMNN_AUTO_TEST_CASE(ArgMinWidthFloat32, ArgMinWidthTest<DataType::Float32>) + +ARMNN_AUTO_TEST_CASE(ArgMaxSimpleQuantisedAsymm8, ArgMaxSimpleTest<DataType::QuantisedAsymm8>) +ARMNN_AUTO_TEST_CASE(ArgMinSimpleQuantisedAsymm8, ArgMinSimpleTest<DataType::QuantisedAsymm8>) +ARMNN_AUTO_TEST_CASE(ArgMinChannelQuantisedAsymm8, ArgMinChannelTest<DataType::QuantisedAsymm8>) +ARMNN_AUTO_TEST_CASE(ArgMaxChannelQuantisedAsymm8, ArgMaxChannelTest<DataType::QuantisedAsymm8>) + +ARMNN_AUTO_TEST_CASE(ArgMaxSimpleQuantisedSymm16, ArgMaxSimpleTest<DataType::QuantisedSymm16>) +ARMNN_AUTO_TEST_CASE(ArgMinSimpleQuantisedSymm16, ArgMinSimpleTest<DataType::QuantisedSymm16>) +ARMNN_AUTO_TEST_CASE(ArgMinChannelQuantisedSymm16, ArgMinChannelTest<DataType::QuantisedSymm16>) +ARMNN_AUTO_TEST_CASE(ArgMaxChannelQuantisedSymm16, ArgMaxChannelTest<DataType::QuantisedSymm16>) // Space To Batch Nd ARMNN_AUTO_TEST_CASE(SpaceToBatchNdSimpleFloat32, SpaceToBatchNdSimpleFloat32Test) |