From 6a5e5e8b7e56f927d70ced3203d6e16df3fdd189 Mon Sep 17 00:00:00 2001 From: Derek Lamberti Date: Thu, 5 Dec 2019 14:41:20 +0000 Subject: IVGCVSW-4227 Add CpuAcc backend support for DetectionPostProcess Change-Id: I318bf92b8d1db593d9c30b9b4412bfecbe65bc12 Signed-off-by: Derek Lamberti --- include/armnn/ILayerSupport.hpp | 14 +- src/backends/backendsCommon/LayerSupportBase.cpp | 13 +- src/backends/backendsCommon/LayerSupportBase.hpp | 9 +- src/backends/backendsCommon/WorkloadFactory.cpp | 20 ++- .../test/IsLayerSupportedTestImpl.hpp | 18 +++ src/backends/neon/NeonLayerSupport.cpp | 24 +++ src/backends/neon/NeonLayerSupport.hpp | 10 ++ src/backends/neon/NeonWorkloadFactory.cpp | 2 +- src/backends/neon/backend.mk | 1 + src/backends/neon/test/NeonEndToEndTests.cpp | 165 +++++++++++++++++++++ src/backends/neon/workloads/CMakeLists.txt | 2 + .../workloads/NeonDetectionPostProcessWorkload.cpp | 112 ++++++++++++++ .../workloads/NeonDetectionPostProcessWorkload.hpp | 40 +++++ src/backends/neon/workloads/NeonWorkloads.hpp | 1 + src/backends/reference/RefLayerSupport.cpp | 17 ++- src/backends/reference/RefLayerSupport.hpp | 9 +- 16 files changed, 433 insertions(+), 24 deletions(-) create mode 100644 src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp create mode 100644 src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp diff --git a/include/armnn/ILayerSupport.hpp b/include/armnn/ILayerSupport.hpp index 54f4a2883b..a2d3961d34 100644 --- a/include/armnn/ILayerSupport.hpp +++ b/include/armnn/ILayerSupport.hpp @@ -111,11 +111,15 @@ public: const TensorInfo& output, Optional reasonIfUnsupported = EmptyOptional()) const = 0; - virtual bool IsDetectionPostProcessSupported( - const TensorInfo& input0, - const TensorInfo& input1, - const DetectionPostProcessDescriptor& descriptor, - Optional reasonIfUnsupported = EmptyOptional()) const = 0; + virtual bool IsDetectionPostProcessSupported(const TensorInfo& boxEncodings, + const TensorInfo& scores, + const TensorInfo& anchors, + const TensorInfo& detectionBoxes, + const TensorInfo& detectionClasses, + const TensorInfo& detectionScores, + const TensorInfo& numDetections, + const DetectionPostProcessDescriptor& descriptor, + Optional reasonIfUnsupported = EmptyOptional()) const =0; virtual bool IsDilatedDepthwiseConvolutionSupported( const TensorInfo& input, diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp index 55261b83cf..00f1d0223d 100644 --- a/src/backends/backendsCommon/LayerSupportBase.cpp +++ b/src/backends/backendsCommon/LayerSupportBase.cpp @@ -163,10 +163,15 @@ bool LayerSupportBase::IsDequantizeSupported(const TensorInfo& input, return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported); } -bool LayerSupportBase::IsDetectionPostProcessSupported(const armnn::TensorInfo& input0, - const armnn::TensorInfo& input1, - const armnn::DetectionPostProcessDescriptor& descriptor, - armnn::Optional reasonIfUnsupported) const +bool LayerSupportBase::IsDetectionPostProcessSupported(const TensorInfo& boxEncodings, + const TensorInfo& scores, + const TensorInfo& anchors, + const TensorInfo& detectionBoxes, + const TensorInfo& detectionClasses, + const TensorInfo& detectionScores, + const TensorInfo& numDetections, + const DetectionPostProcessDescriptor& descriptor, + Optional reasonIfUnsupported) const { return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported); } diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp index e99cb67614..60f94d0c4d 100644 --- a/src/backends/backendsCommon/LayerSupportBase.hpp +++ b/src/backends/backendsCommon/LayerSupportBase.hpp @@ -96,8 +96,13 @@ public: const TensorInfo& output, Optional reasonIfUnsupported = EmptyOptional()) const override; - bool IsDetectionPostProcessSupported(const TensorInfo& input0, - const TensorInfo& input1, + bool IsDetectionPostProcessSupported(const TensorInfo& boxEncodings, + const TensorInfo& scores, + const TensorInfo& anchors, + const TensorInfo& detectionBoxes, + const TensorInfo& detectionClasses, + const TensorInfo& detectionScores, + const TensorInfo& numDetections, const DetectionPostProcessDescriptor& descriptor, Optional reasonIfUnsupported = EmptyOptional()) const override; diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp index 1d4ed7e159..805ec7ba5f 100644 --- a/src/backends/backendsCommon/WorkloadFactory.cpp +++ b/src/backends/backendsCommon/WorkloadFactory.cpp @@ -272,12 +272,24 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, } case LayerType::DetectionPostProcess: { - const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); - const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); auto cLayer = boost::polymorphic_downcast(&layer); + const TensorInfo& boxEncodings = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& scores = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); + const TensorInfo& anchors = cLayer->m_Anchors->GetTensorInfo(); + + const TensorInfo& detectionBoxes = layer.GetOutputSlot(0).GetTensorInfo(); + const TensorInfo& detectionClasses = layer.GetOutputSlot(1).GetTensorInfo(); + const TensorInfo& detectionScores = layer.GetOutputSlot(2).GetTensorInfo(); + const TensorInfo& numDetections = layer.GetOutputSlot(3).GetTensorInfo(); + const DetectionPostProcessDescriptor& descriptor = cLayer->GetParameters(); - result = layerSupportObject->IsDetectionPostProcessSupported(input0, - input1, + result = layerSupportObject->IsDetectionPostProcessSupported(boxEncodings, + scores, + anchors, + detectionBoxes, + detectionClasses, + detectionScores, + numDetections, descriptor, reason); break; diff --git a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp index 12d7143122..7ab5ee4ec4 100644 --- a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp +++ b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp @@ -238,6 +238,24 @@ struct DummyLayer { }; +template<> +struct DummyLayer +{ + DummyLayer() + { + m_Layer = dummyGraph.AddLayer(armnn::DetectionPostProcessDescriptor(), ""); + m_Layer->m_Anchors = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + } + + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + + armnn::DetectionPostProcessLayer* m_Layer; +}; + template struct DummyLstmLayer { diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp index 20b655098d..3fc323393e 100644 --- a/src/backends/neon/NeonLayerSupport.cpp +++ b/src/backends/neon/NeonLayerSupport.cpp @@ -28,6 +28,7 @@ #include "workloads/NeonDepthToSpaceWorkload.hpp" #include "workloads/NeonDepthwiseConvolutionWorkload.hpp" #include "workloads/NeonDequantizeWorkload.hpp" +#include "workloads/NeonDetectionPostProcessWorkload.hpp" #include "workloads/NeonGreaterWorkload.hpp" #include "workloads/NeonInstanceNormalizationWorkload.hpp" #include "workloads/NeonL2NormalizationFloatWorkload.hpp" @@ -339,6 +340,29 @@ bool NeonLayerSupport::IsDequantizeSupported(const TensorInfo& input, output); } +bool NeonLayerSupport::IsDetectionPostProcessSupported(const TensorInfo& boxEncodings, + const TensorInfo& scores, + const TensorInfo& anchors, + const TensorInfo& detectionBoxes, + const TensorInfo& detectionClasses, + const TensorInfo& detectionScores, + const TensorInfo& numDetections, + const DetectionPostProcessDescriptor& descriptor, + Optional reasonIfUnsupported) const +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonDetectionPostProcessValidate, + reasonIfUnsupported, + boxEncodings, + scores, + anchors, + detectionBoxes, + detectionClasses, + detectionScores, + numDetections, + descriptor); +} + + bool NeonLayerSupport::IsDilatedDepthwiseConvolutionSupported(const TensorInfo& input, const TensorInfo& output, const DepthwiseConvolution2dDescriptor& descriptor, diff --git a/src/backends/neon/NeonLayerSupport.hpp b/src/backends/neon/NeonLayerSupport.hpp index 5d4fbad97f..8e6cd6aded 100644 --- a/src/backends/neon/NeonLayerSupport.hpp +++ b/src/backends/neon/NeonLayerSupport.hpp @@ -86,6 +86,16 @@ public: const TensorInfo& output, Optional reasonIfUnsupported = EmptyOptional()) const override; + bool IsDetectionPostProcessSupported(const TensorInfo& boxEncodings, + const TensorInfo& scores, + const TensorInfo& anchors, + const TensorInfo& detectionBoxes, + const TensorInfo& detectionClasses, + const TensorInfo& detectionScores, + const TensorInfo& numDetections, + const DetectionPostProcessDescriptor& descriptor, + Optional reasonIfUnsupported = EmptyOptional()) const override; + bool IsDilatedDepthwiseConvolutionSupported(const TensorInfo& input, const TensorInfo& output, const DepthwiseConvolution2dDescriptor& descriptor, diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp index dd11af4484..8d798ec864 100644 --- a/src/backends/neon/NeonWorkloadFactory.cpp +++ b/src/backends/neon/NeonWorkloadFactory.cpp @@ -205,7 +205,7 @@ std::unique_ptr NeonWorkloadFactory::CreateDequantize(const Dequantiz std::unique_ptr NeonWorkloadFactory::CreateDetectionPostProcess( const armnn::DetectionPostProcessQueueDescriptor& descriptor, const armnn::WorkloadInfo& info) const { - return MakeWorkloadHelper(descriptor, info); + return std::make_unique(descriptor, info); } std::unique_ptr NeonWorkloadFactory::CreateDivision( diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk index 23289345ea..3ddc79ada3 100644 --- a/src/backends/neon/backend.mk +++ b/src/backends/neon/backend.mk @@ -34,6 +34,7 @@ BACKEND_SOURCES := \ workloads/NeonDepthToSpaceWorkload.cpp \ workloads/NeonDepthwiseConvolutionWorkload.cpp \ workloads/NeonDequantizeWorkload.cpp \ + workloads/NeonDetectionPostProcessWorkload.cpp \ workloads/NeonFloorFloatWorkload.cpp \ workloads/NeonFullyConnectedWorkload.cpp \ workloads/NeonGreaterWorkload.cpp \ diff --git a/src/backends/neon/test/NeonEndToEndTests.cpp b/src/backends/neon/test/NeonEndToEndTests.cpp index e841821b57..2f4c847971 100644 --- a/src/backends/neon/test/NeonEndToEndTests.cpp +++ b/src/backends/neon/test/NeonEndToEndTests.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -504,4 +505,168 @@ BOOST_AUTO_TEST_CASE(NeonArgMinAxis3TestQuantisedAsymm8) ArgMinAxis3EndToEnd(defaultBackends); } +BOOST_AUTO_TEST_CASE(NeonDetectionPostProcessRegularNmsTest) +{ + std::vector boxEncodings({ + 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, -1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f + }); + std::vector scores({ + 0.0f, 0.9f, 0.8f, + 0.0f, 0.75f, 0.72f, + 0.0f, 0.6f, 0.5f, + 0.0f, 0.93f, 0.95f, + 0.0f, 0.5f, 0.4f, + 0.0f, 0.3f, 0.2f + }); + std::vector anchors({ + 0.5f, 0.5f, 1.0f, 1.0f, + 0.5f, 0.5f, 1.0f, 1.0f, + 0.5f, 0.5f, 1.0f, 1.0f, + 0.5f, 10.5f, 1.0f, 1.0f, + 0.5f, 10.5f, 1.0f, 1.0f, + 0.5f, 100.5f, 1.0f, 1.0f + }); + DetectionPostProcessRegularNmsEndToEnd(defaultBackends, boxEncodings, scores, anchors); +} + +inline void QuantizeData(uint8_t* quant, const float* dequant, const TensorInfo& info) +{ + for (size_t i = 0; i < info.GetNumElements(); i++) + { + quant[i] = armnn::Quantize(dequant[i], info.GetQuantizationScale(), info.GetQuantizationOffset()); + } +} + +BOOST_AUTO_TEST_CASE(NeonDetectionPostProcessRegularNmsUint8Test) +{ + armnn::TensorInfo boxEncodingsInfo({ 1, 6, 4 }, armnn::DataType::Float32); + armnn::TensorInfo scoresInfo({ 1, 6, 3 }, armnn::DataType::Float32); + armnn::TensorInfo anchorsInfo({ 6, 4 }, armnn::DataType::Float32); + + boxEncodingsInfo.SetQuantizationScale(1.0f); + boxEncodingsInfo.SetQuantizationOffset(1); + scoresInfo.SetQuantizationScale(0.01f); + scoresInfo.SetQuantizationOffset(0); + anchorsInfo.SetQuantizationScale(0.5f); + anchorsInfo.SetQuantizationOffset(0); + + std::vector boxEncodings({ + 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, -1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f + }); + std::vector scores({ + 0.0f, 0.9f, 0.8f, + 0.0f, 0.75f, 0.72f, + 0.0f, 0.6f, 0.5f, + 0.0f, 0.93f, 0.95f, + 0.0f, 0.5f, 0.4f, + 0.0f, 0.3f, 0.2f + }); + std::vector anchors({ + 0.5f, 0.5f, 1.0f, 1.0f, + 0.5f, 0.5f, 1.0f, 1.0f, + 0.5f, 0.5f, 1.0f, 1.0f, + 0.5f, 10.5f, 1.0f, 1.0f, + 0.5f, 10.5f, 1.0f, 1.0f, + 0.5f, 100.5f, 1.0f, 1.0f + }); + + std::vector qBoxEncodings(boxEncodings.size(), 0); + std::vector qScores(scores.size(), 0); + std::vector qAnchors(anchors.size(), 0); + QuantizeData(qBoxEncodings.data(), boxEncodings.data(), boxEncodingsInfo); + QuantizeData(qScores.data(), scores.data(), scoresInfo); + QuantizeData(qAnchors.data(), anchors.data(), anchorsInfo); + DetectionPostProcessRegularNmsEndToEnd(defaultBackends, qBoxEncodings, + qScores, qAnchors, + 1.0f, 1, 0.01f, 0, 0.5f, 0); +} + +BOOST_AUTO_TEST_CASE(NeonDetectionPostProcessFastNmsTest) +{ + std::vector boxEncodings({ + 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, -1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f + }); + std::vector scores({ + 0.0f, 0.9f, 0.8f, + 0.0f, 0.75f, 0.72f, + 0.0f, 0.6f, 0.5f, + 0.0f, 0.93f, 0.95f, + 0.0f, 0.5f, 0.4f, + 0.0f, 0.3f, 0.2f + }); + std::vector anchors({ + 0.5f, 0.5f, 1.0f, 1.0f, + 0.5f, 0.5f, 1.0f, 1.0f, + 0.5f, 0.5f, 1.0f, 1.0f, + 0.5f, 10.5f, 1.0f, 1.0f, + 0.5f, 10.5f, 1.0f, 1.0f, + 0.5f, 100.5f, 1.0f, 1.0f + }); + DetectionPostProcessFastNmsEndToEnd(defaultBackends, boxEncodings, scores, anchors); +} + +BOOST_AUTO_TEST_CASE(RefDetectionPostProcessFastNmsUint8Test) +{ + armnn::TensorInfo boxEncodingsInfo({ 1, 6, 4 }, armnn::DataType::Float32); + armnn::TensorInfo scoresInfo({ 1, 6, 3 }, armnn::DataType::Float32); + armnn::TensorInfo anchorsInfo({ 6, 4 }, armnn::DataType::Float32); + + boxEncodingsInfo.SetQuantizationScale(1.0f); + boxEncodingsInfo.SetQuantizationOffset(1); + scoresInfo.SetQuantizationScale(0.01f); + scoresInfo.SetQuantizationOffset(0); + anchorsInfo.SetQuantizationScale(0.5f); + anchorsInfo.SetQuantizationOffset(0); + + std::vector boxEncodings({ + 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, -1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f + }); + std::vector scores({ + 0.0f, 0.9f, 0.8f, + 0.0f, 0.75f, 0.72f, + 0.0f, 0.6f, 0.5f, + 0.0f, 0.93f, 0.95f, + 0.0f, 0.5f, 0.4f, + 0.0f, 0.3f, 0.2f + }); + std::vector anchors({ + 0.5f, 0.5f, 1.0f, 1.0f, + 0.5f, 0.5f, 1.0f, 1.0f, + 0.5f, 0.5f, 1.0f, 1.0f, + 0.5f, 10.5f, 1.0f, 1.0f, + 0.5f, 10.5f, 1.0f, 1.0f, + 0.5f, 100.5f, 1.0f, 1.0f + }); + + std::vector qBoxEncodings(boxEncodings.size(), 0); + std::vector qScores(scores.size(), 0); + std::vector qAnchors(anchors.size(), 0); + QuantizeData(qBoxEncodings.data(), boxEncodings.data(), boxEncodingsInfo); + QuantizeData(qScores.data(), scores.data(), scoresInfo); + QuantizeData(qAnchors.data(), anchors.data(), anchorsInfo); + DetectionPostProcessFastNmsEndToEnd(defaultBackends, qBoxEncodings, + qScores, qAnchors, + 1.0f, 1, 0.01f, 0, 0.5f, 0); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/neon/workloads/CMakeLists.txt b/src/backends/neon/workloads/CMakeLists.txt index cf3789e5ac..44db6d263a 100644 --- a/src/backends/neon/workloads/CMakeLists.txt +++ b/src/backends/neon/workloads/CMakeLists.txt @@ -30,6 +30,8 @@ list(APPEND armnnNeonBackendWorkloads_sources NeonDepthwiseConvolutionWorkload.hpp NeonDequantizeWorkload.cpp NeonDequantizeWorkload.hpp + NeonDetectionPostProcessWorkload.cpp + NeonDetectionPostProcessWorkload.hpp NeonFloorFloatWorkload.cpp NeonFloorFloatWorkload.hpp NeonFullyConnectedWorkload.cpp diff --git a/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp new file mode 100644 index 0000000000..481e950953 --- /dev/null +++ b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp @@ -0,0 +1,112 @@ +// +// Copyright © 2019 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonDetectionPostProcessWorkload.hpp" + +#include "NeonWorkloadUtils.hpp" + +#include +#include + +#include + +namespace armnn +{ + +arm_compute::DetectionPostProcessLayerInfo MakeInfo(const DetectionPostProcessDescriptor& desc) +{ + return arm_compute::DetectionPostProcessLayerInfo(desc.m_MaxDetections, + desc.m_MaxClassesPerDetection, + desc.m_NmsScoreThreshold, + desc.m_NmsIouThreshold, + desc.m_NumClasses, + { desc.m_ScaleX, + desc.m_ScaleY, + desc.m_ScaleW, + desc.m_ScaleH }, + desc.m_UseRegularNms, + desc.m_DetectionsPerClass); +} + +arm_compute::Status NeonDetectionPostProcessValidate(const TensorInfo& boxEncodings, + const TensorInfo& scores, + const TensorInfo& anchors, + const TensorInfo& detectionBoxes, + const TensorInfo& detectionClasses, + const TensorInfo& detectionScores, + const TensorInfo& numDetections, + const DetectionPostProcessDescriptor &desc) +{ + arm_compute::DetectionPostProcessLayerInfo info = MakeInfo(desc); + + const arm_compute::TensorInfo aclBoxEncodings = + armcomputetensorutils::BuildArmComputeTensorInfo(boxEncodings); + + const arm_compute::TensorInfo aclScores = + armcomputetensorutils::BuildArmComputeTensorInfo(scores); + + const arm_compute::TensorInfo aclAnchors = + armcomputetensorutils::BuildArmComputeTensorInfo(anchors); + + arm_compute::TensorInfo aclDetectionBoxes = + armcomputetensorutils::BuildArmComputeTensorInfo(detectionBoxes); + + arm_compute::TensorInfo aclDetectionClasses = + armcomputetensorutils::BuildArmComputeTensorInfo(detectionClasses); + + arm_compute::TensorInfo aclDetectionScores = + armcomputetensorutils::BuildArmComputeTensorInfo(detectionScores); + + arm_compute::TensorInfo aclNumDetections = + armcomputetensorutils::BuildArmComputeTensorInfo(numDetections); + + return arm_compute::CPPDetectionPostProcessLayer::validate( + &aclBoxEncodings, + &aclScores, + &aclAnchors, + &aclDetectionBoxes, + &aclDetectionClasses, + &aclDetectionScores, + &aclNumDetections, + info); +} + +NeonDetectionPostProcessWorkload::NeonDetectionPostProcessWorkload( + const DetectionPostProcessQueueDescriptor& descriptor, + const WorkloadInfo& info) + : BaseWorkload(descriptor, info) +{ + m_Anchors = std::make_unique(); + BuildArmComputeTensor(*m_Anchors, descriptor.m_Anchors->GetTensorInfo()); + + arm_compute::DetectionPostProcessLayerInfo di = MakeInfo(m_Data.m_Parameters); + + auto AclTensorRef = [](ITensorHandle* tensor) -> arm_compute::ITensor& + { + return boost::polymorphic_downcast(tensor)->GetTensor(); + }; + + arm_compute::ITensor& boxEncodings = AclTensorRef(m_Data.m_Inputs[0]); + arm_compute::ITensor& scores = AclTensorRef(m_Data.m_Inputs[1]); + + arm_compute::ITensor& detectionBoxes = AclTensorRef(m_Data.m_Outputs[0]); + arm_compute::ITensor& detectionClasses = AclTensorRef(m_Data.m_Outputs[1]); + arm_compute::ITensor& detectionScores = AclTensorRef(m_Data.m_Outputs[2]); + arm_compute::ITensor& numDetections = AclTensorRef(m_Data.m_Outputs[3]); + + m_Func.configure(&boxEncodings, &scores, m_Anchors.get(), + &detectionBoxes, &detectionClasses, &detectionScores, &numDetections, + di); + + InitializeArmComputeTensorData(*m_Anchors, m_Data.m_Anchors); +} + +void NeonDetectionPostProcessWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDetectionPostProcessWorkload_Execute"); + m_Func.run(); +} + +} // namespace armnn diff --git a/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp new file mode 100644 index 0000000000..f7f0ebffb9 --- /dev/null +++ b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp @@ -0,0 +1,40 @@ +// +// Copyright © 2019 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include +#include + +namespace armnn +{ + +arm_compute::Status NeonDetectionPostProcessValidate(const TensorInfo& boxEncodings, + const TensorInfo& scores, + const TensorInfo& anchors, + const TensorInfo& detectionBoxes, + const TensorInfo& detectionClasses, + const TensorInfo& detectionScores, + const TensorInfo& numDetections, + const DetectionPostProcessDescriptor &desc); + +class NeonDetectionPostProcessWorkload : public BaseWorkload +{ +public: + NeonDetectionPostProcessWorkload( + const DetectionPostProcessQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::CPPDetectionPostProcessLayer m_Func; + + std::unique_ptr m_Anchors; + +}; + +} // namespace armnn \ No newline at end of file diff --git a/src/backends/neon/workloads/NeonWorkloads.hpp b/src/backends/neon/workloads/NeonWorkloads.hpp index 77d819e3ab..dc9bef31b2 100644 --- a/src/backends/neon/workloads/NeonWorkloads.hpp +++ b/src/backends/neon/workloads/NeonWorkloads.hpp @@ -16,6 +16,7 @@ #include "NeonDepthToSpaceWorkload.hpp" #include "NeonDepthwiseConvolutionWorkload.hpp" #include "NeonDequantizeWorkload.hpp" +#include "NeonDetectionPostProcessWorkload.hpp" #include "NeonFloorFloatWorkload.hpp" #include "NeonFullyConnectedWorkload.hpp" #include "NeonGreaterWorkload.hpp" diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp index 30ce5c69b5..299503ddc6 100644 --- a/src/backends/reference/RefLayerSupport.cpp +++ b/src/backends/reference/RefLayerSupport.cpp @@ -637,10 +637,15 @@ bool RefLayerSupport::IsDequantizeSupported(const TensorInfo& input, return supported; } -bool RefLayerSupport::IsDetectionPostProcessSupported(const armnn::TensorInfo& input0, - const armnn::TensorInfo& input1, - const armnn::DetectionPostProcessDescriptor& descriptor, - armnn::Optional reasonIfUnsupported) const +bool RefLayerSupport::IsDetectionPostProcessSupported(const TensorInfo& boxEncodings, + const TensorInfo& scores, + const TensorInfo& anchors, + const TensorInfo& detectionBoxes, + const TensorInfo& detectionClasses, + const TensorInfo& detectionScores, + const TensorInfo& numDetections, + const DetectionPostProcessDescriptor& descriptor, + Optional reasonIfUnsupported) const { bool supported = true; @@ -651,10 +656,10 @@ bool RefLayerSupport::IsDetectionPostProcessSupported(const armnn::TensorInfo& i DataType::QuantisedSymm16 }; - supported &= CheckSupportRule(TypeAnyOf(input0, supportedInputTypes), reasonIfUnsupported, + supported &= CheckSupportRule(TypeAnyOf(boxEncodings, supportedInputTypes), reasonIfUnsupported, "Reference DetectionPostProcess: input 0 is not a supported type."); - supported &= CheckSupportRule(TypeAnyOf(input1, supportedInputTypes), reasonIfUnsupported, + supported &= CheckSupportRule(TypeAnyOf(scores, supportedInputTypes), reasonIfUnsupported, "Reference DetectionPostProcess: input 1 is not a supported type."); return supported; diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp index 04b355ee0a..a7d6303d86 100644 --- a/src/backends/reference/RefLayerSupport.hpp +++ b/src/backends/reference/RefLayerSupport.hpp @@ -94,8 +94,13 @@ public: const TensorInfo& output, Optional reasonIfUnsupported = EmptyOptional()) const override; - bool IsDetectionPostProcessSupported(const TensorInfo& input0, - const TensorInfo& input1, + bool IsDetectionPostProcessSupported(const TensorInfo& boxEncodings, + const TensorInfo& scores, + const TensorInfo& anchors, + const TensorInfo& detectionBoxes, + const TensorInfo& detectionClasses, + const TensorInfo& detectionScores, + const TensorInfo& numDetections, const DetectionPostProcessDescriptor& descriptor, Optional reasonIfUnsupported = EmptyOptional()) const override; -- cgit v1.2.1