aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDerek Lamberti <derek.lamberti@arm.com>2019-12-05 14:41:20 +0000
committerDerek Lamberti <derek.lamberti@arm.com>2019-12-09 14:22:15 +0000
commit6a5e5e8b7e56f927d70ced3203d6e16df3fdd189 (patch)
treed961fa1349c6a51a595e51c29c1df42419a7a426
parente5b8eb9fe8147a0849db08ef0898a0e8bef920b4 (diff)
downloadarmnn-6a5e5e8b7e56f927d70ced3203d6e16df3fdd189.tar.gz
IVGCVSW-4227 Add CpuAcc backend support for DetectionPostProcess
Change-Id: I318bf92b8d1db593d9c30b9b4412bfecbe65bc12 Signed-off-by: Derek Lamberti <derek.lamberti@arm.com>
-rw-r--r--include/armnn/ILayerSupport.hpp14
-rw-r--r--src/backends/backendsCommon/LayerSupportBase.cpp13
-rw-r--r--src/backends/backendsCommon/LayerSupportBase.hpp9
-rw-r--r--src/backends/backendsCommon/WorkloadFactory.cpp20
-rw-r--r--src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp18
-rw-r--r--src/backends/neon/NeonLayerSupport.cpp24
-rw-r--r--src/backends/neon/NeonLayerSupport.hpp10
-rw-r--r--src/backends/neon/NeonWorkloadFactory.cpp2
-rw-r--r--src/backends/neon/backend.mk1
-rw-r--r--src/backends/neon/test/NeonEndToEndTests.cpp165
-rw-r--r--src/backends/neon/workloads/CMakeLists.txt2
-rw-r--r--src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp112
-rw-r--r--src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp40
-rw-r--r--src/backends/neon/workloads/NeonWorkloads.hpp1
-rw-r--r--src/backends/reference/RefLayerSupport.cpp17
-rw-r--r--src/backends/reference/RefLayerSupport.hpp9
16 files changed, 433 insertions, 24 deletions
diff --git a/include/armnn/ILayerSupport.hpp b/include/armnn/ILayerSupport.hpp
index 54f4a2883b..a2d3961d34 100644
--- a/include/armnn/ILayerSupport.hpp
+++ b/include/armnn/ILayerSupport.hpp
@@ -111,11 +111,15 @@ public:
const TensorInfo& output,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
- virtual bool IsDetectionPostProcessSupported(
- const TensorInfo& input0,
- const TensorInfo& input1,
- const DetectionPostProcessDescriptor& descriptor,
- Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
+ virtual bool IsDetectionPostProcessSupported(const TensorInfo& boxEncodings,
+ const TensorInfo& scores,
+ const TensorInfo& anchors,
+ const TensorInfo& detectionBoxes,
+ const TensorInfo& detectionClasses,
+ const TensorInfo& detectionScores,
+ const TensorInfo& numDetections,
+ const DetectionPostProcessDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const =0;
virtual bool IsDilatedDepthwiseConvolutionSupported(
const TensorInfo& input,
diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp
index 55261b83cf..00f1d0223d 100644
--- a/src/backends/backendsCommon/LayerSupportBase.cpp
+++ b/src/backends/backendsCommon/LayerSupportBase.cpp
@@ -163,10 +163,15 @@ bool LayerSupportBase::IsDequantizeSupported(const TensorInfo& input,
return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
}
-bool LayerSupportBase::IsDetectionPostProcessSupported(const armnn::TensorInfo& input0,
- const armnn::TensorInfo& input1,
- const armnn::DetectionPostProcessDescriptor& descriptor,
- armnn::Optional<std::string&> reasonIfUnsupported) const
+bool LayerSupportBase::IsDetectionPostProcessSupported(const TensorInfo& boxEncodings,
+ const TensorInfo& scores,
+ const TensorInfo& anchors,
+ const TensorInfo& detectionBoxes,
+ const TensorInfo& detectionClasses,
+ const TensorInfo& detectionScores,
+ const TensorInfo& numDetections,
+ const DetectionPostProcessDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported) const
{
return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
}
diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp
index e99cb67614..60f94d0c4d 100644
--- a/src/backends/backendsCommon/LayerSupportBase.hpp
+++ b/src/backends/backendsCommon/LayerSupportBase.hpp
@@ -96,8 +96,13 @@ public:
const TensorInfo& output,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
- bool IsDetectionPostProcessSupported(const TensorInfo& input0,
- const TensorInfo& input1,
+ bool IsDetectionPostProcessSupported(const TensorInfo& boxEncodings,
+ const TensorInfo& scores,
+ const TensorInfo& anchors,
+ const TensorInfo& detectionBoxes,
+ const TensorInfo& detectionClasses,
+ const TensorInfo& detectionScores,
+ const TensorInfo& numDetections,
const DetectionPostProcessDescriptor& descriptor,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
index 1d4ed7e159..805ec7ba5f 100644
--- a/src/backends/backendsCommon/WorkloadFactory.cpp
+++ b/src/backends/backendsCommon/WorkloadFactory.cpp
@@ -272,12 +272,24 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId,
}
case LayerType::DetectionPostProcess:
{
- const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
- const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo();
auto cLayer = boost::polymorphic_downcast<const DetectionPostProcessLayer*>(&layer);
+ const TensorInfo& boxEncodings = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
+ const TensorInfo& scores = layer.GetInputSlot(1).GetConnection()->GetTensorInfo();
+ const TensorInfo& anchors = cLayer->m_Anchors->GetTensorInfo();
+
+ const TensorInfo& detectionBoxes = layer.GetOutputSlot(0).GetTensorInfo();
+ const TensorInfo& detectionClasses = layer.GetOutputSlot(1).GetTensorInfo();
+ const TensorInfo& detectionScores = layer.GetOutputSlot(2).GetTensorInfo();
+ const TensorInfo& numDetections = layer.GetOutputSlot(3).GetTensorInfo();
+
const DetectionPostProcessDescriptor& descriptor = cLayer->GetParameters();
- result = layerSupportObject->IsDetectionPostProcessSupported(input0,
- input1,
+ result = layerSupportObject->IsDetectionPostProcessSupported(boxEncodings,
+ scores,
+ anchors,
+ detectionBoxes,
+ detectionClasses,
+ detectionScores,
+ numDetections,
descriptor,
reason);
break;
diff --git a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
index 12d7143122..7ab5ee4ec4 100644
--- a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
+++ b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
@@ -238,6 +238,24 @@ struct DummyLayer<armnn::TransposeConvolution2dLayer>
{
};
+template<>
+struct DummyLayer<armnn::DetectionPostProcessLayer>
+{
+ DummyLayer()
+ {
+ m_Layer = dummyGraph.AddLayer<armnn::DetectionPostProcessLayer>(armnn::DetectionPostProcessDescriptor(), "");
+ m_Layer->m_Anchors = std::make_unique<armnn::ScopedCpuTensorHandle>(
+ armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32));
+ }
+
+ ~DummyLayer()
+ {
+ dummyGraph.EraseLayer(m_Layer);
+ }
+
+ armnn::DetectionPostProcessLayer* m_Layer;
+};
+
template <typename LstmLayerType>
struct DummyLstmLayer
{
diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp
index 20b655098d..3fc323393e 100644
--- a/src/backends/neon/NeonLayerSupport.cpp
+++ b/src/backends/neon/NeonLayerSupport.cpp
@@ -28,6 +28,7 @@
#include "workloads/NeonDepthToSpaceWorkload.hpp"
#include "workloads/NeonDepthwiseConvolutionWorkload.hpp"
#include "workloads/NeonDequantizeWorkload.hpp"
+#include "workloads/NeonDetectionPostProcessWorkload.hpp"
#include "workloads/NeonGreaterWorkload.hpp"
#include "workloads/NeonInstanceNormalizationWorkload.hpp"
#include "workloads/NeonL2NormalizationFloatWorkload.hpp"
@@ -339,6 +340,29 @@ bool NeonLayerSupport::IsDequantizeSupported(const TensorInfo& input,
output);
}
+bool NeonLayerSupport::IsDetectionPostProcessSupported(const TensorInfo& boxEncodings,
+ const TensorInfo& scores,
+ const TensorInfo& anchors,
+ const TensorInfo& detectionBoxes,
+ const TensorInfo& detectionClasses,
+ const TensorInfo& detectionScores,
+ const TensorInfo& numDetections,
+ const DetectionPostProcessDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported) const
+{
+ FORWARD_WORKLOAD_VALIDATE_FUNC(NeonDetectionPostProcessValidate,
+ reasonIfUnsupported,
+ boxEncodings,
+ scores,
+ anchors,
+ detectionBoxes,
+ detectionClasses,
+ detectionScores,
+ numDetections,
+ descriptor);
+}
+
+
bool NeonLayerSupport::IsDilatedDepthwiseConvolutionSupported(const TensorInfo& input,
const TensorInfo& output,
const DepthwiseConvolution2dDescriptor& descriptor,
diff --git a/src/backends/neon/NeonLayerSupport.hpp b/src/backends/neon/NeonLayerSupport.hpp
index 5d4fbad97f..8e6cd6aded 100644
--- a/src/backends/neon/NeonLayerSupport.hpp
+++ b/src/backends/neon/NeonLayerSupport.hpp
@@ -86,6 +86,16 @@ public:
const TensorInfo& output,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+ bool IsDetectionPostProcessSupported(const TensorInfo& boxEncodings,
+ const TensorInfo& scores,
+ const TensorInfo& anchors,
+ const TensorInfo& detectionBoxes,
+ const TensorInfo& detectionClasses,
+ const TensorInfo& detectionScores,
+ const TensorInfo& numDetections,
+ const DetectionPostProcessDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
bool IsDilatedDepthwiseConvolutionSupported(const TensorInfo& input,
const TensorInfo& output,
const DepthwiseConvolution2dDescriptor& descriptor,
diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp
index dd11af4484..8d798ec864 100644
--- a/src/backends/neon/NeonWorkloadFactory.cpp
+++ b/src/backends/neon/NeonWorkloadFactory.cpp
@@ -205,7 +205,7 @@ std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDequantize(const Dequantiz
std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDetectionPostProcess(
const armnn::DetectionPostProcessQueueDescriptor& descriptor, const armnn::WorkloadInfo& info) const
{
- return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
+ return std::make_unique<NeonDetectionPostProcessWorkload>(descriptor, info);
}
std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateDivision(
diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk
index 23289345ea..3ddc79ada3 100644
--- a/src/backends/neon/backend.mk
+++ b/src/backends/neon/backend.mk
@@ -34,6 +34,7 @@ BACKEND_SOURCES := \
workloads/NeonDepthToSpaceWorkload.cpp \
workloads/NeonDepthwiseConvolutionWorkload.cpp \
workloads/NeonDequantizeWorkload.cpp \
+ workloads/NeonDetectionPostProcessWorkload.cpp \
workloads/NeonFloorFloatWorkload.cpp \
workloads/NeonFullyConnectedWorkload.cpp \
workloads/NeonGreaterWorkload.cpp \
diff --git a/src/backends/neon/test/NeonEndToEndTests.cpp b/src/backends/neon/test/NeonEndToEndTests.cpp
index e841821b57..2f4c847971 100644
--- a/src/backends/neon/test/NeonEndToEndTests.cpp
+++ b/src/backends/neon/test/NeonEndToEndTests.cpp
@@ -11,6 +11,7 @@
#include <backendsCommon/test/ConcatEndToEndTestImpl.hpp>
#include <backendsCommon/test/DepthToSpaceEndToEndTestImpl.hpp>
#include <backendsCommon/test/DequantizeEndToEndTestImpl.hpp>
+#include <backendsCommon/test/DetectionPostProcessEndToEndTestImpl.hpp>
#include <backendsCommon/test/InstanceNormalizationEndToEndTestImpl.hpp>
#include <backendsCommon/test/PreluEndToEndTestImpl.hpp>
#include <backendsCommon/test/QuantizedLstmEndToEndTestImpl.hpp>
@@ -504,4 +505,168 @@ BOOST_AUTO_TEST_CASE(NeonArgMinAxis3TestQuantisedAsymm8)
ArgMinAxis3EndToEnd<armnn::DataType::QuantisedAsymm8>(defaultBackends);
}
+BOOST_AUTO_TEST_CASE(NeonDetectionPostProcessRegularNmsTest)
+{
+ std::vector<float> boxEncodings({
+ 0.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 1.0f, 0.0f, 0.0f,
+ 0.0f, -1.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 1.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 0.0f
+ });
+ std::vector<float> scores({
+ 0.0f, 0.9f, 0.8f,
+ 0.0f, 0.75f, 0.72f,
+ 0.0f, 0.6f, 0.5f,
+ 0.0f, 0.93f, 0.95f,
+ 0.0f, 0.5f, 0.4f,
+ 0.0f, 0.3f, 0.2f
+ });
+ std::vector<float> anchors({
+ 0.5f, 0.5f, 1.0f, 1.0f,
+ 0.5f, 0.5f, 1.0f, 1.0f,
+ 0.5f, 0.5f, 1.0f, 1.0f,
+ 0.5f, 10.5f, 1.0f, 1.0f,
+ 0.5f, 10.5f, 1.0f, 1.0f,
+ 0.5f, 100.5f, 1.0f, 1.0f
+ });
+ DetectionPostProcessRegularNmsEndToEnd<armnn::DataType::Float32>(defaultBackends, boxEncodings, scores, anchors);
+}
+
+inline void QuantizeData(uint8_t* quant, const float* dequant, const TensorInfo& info)
+{
+ for (size_t i = 0; i < info.GetNumElements(); i++)
+ {
+ quant[i] = armnn::Quantize<uint8_t>(dequant[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
+ }
+}
+
+BOOST_AUTO_TEST_CASE(NeonDetectionPostProcessRegularNmsUint8Test)
+{
+ armnn::TensorInfo boxEncodingsInfo({ 1, 6, 4 }, armnn::DataType::Float32);
+ armnn::TensorInfo scoresInfo({ 1, 6, 3 }, armnn::DataType::Float32);
+ armnn::TensorInfo anchorsInfo({ 6, 4 }, armnn::DataType::Float32);
+
+ boxEncodingsInfo.SetQuantizationScale(1.0f);
+ boxEncodingsInfo.SetQuantizationOffset(1);
+ scoresInfo.SetQuantizationScale(0.01f);
+ scoresInfo.SetQuantizationOffset(0);
+ anchorsInfo.SetQuantizationScale(0.5f);
+ anchorsInfo.SetQuantizationOffset(0);
+
+ std::vector<float> boxEncodings({
+ 0.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 1.0f, 0.0f, 0.0f,
+ 0.0f, -1.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 1.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 0.0f
+ });
+ std::vector<float> scores({
+ 0.0f, 0.9f, 0.8f,
+ 0.0f, 0.75f, 0.72f,
+ 0.0f, 0.6f, 0.5f,
+ 0.0f, 0.93f, 0.95f,
+ 0.0f, 0.5f, 0.4f,
+ 0.0f, 0.3f, 0.2f
+ });
+ std::vector<float> anchors({
+ 0.5f, 0.5f, 1.0f, 1.0f,
+ 0.5f, 0.5f, 1.0f, 1.0f,
+ 0.5f, 0.5f, 1.0f, 1.0f,
+ 0.5f, 10.5f, 1.0f, 1.0f,
+ 0.5f, 10.5f, 1.0f, 1.0f,
+ 0.5f, 100.5f, 1.0f, 1.0f
+ });
+
+ std::vector<uint8_t> qBoxEncodings(boxEncodings.size(), 0);
+ std::vector<uint8_t> qScores(scores.size(), 0);
+ std::vector<uint8_t> qAnchors(anchors.size(), 0);
+ QuantizeData(qBoxEncodings.data(), boxEncodings.data(), boxEncodingsInfo);
+ QuantizeData(qScores.data(), scores.data(), scoresInfo);
+ QuantizeData(qAnchors.data(), anchors.data(), anchorsInfo);
+ DetectionPostProcessRegularNmsEndToEnd<armnn::DataType::QuantisedAsymm8>(defaultBackends, qBoxEncodings,
+ qScores, qAnchors,
+ 1.0f, 1, 0.01f, 0, 0.5f, 0);
+}
+
+BOOST_AUTO_TEST_CASE(NeonDetectionPostProcessFastNmsTest)
+{
+ std::vector<float> boxEncodings({
+ 0.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 1.0f, 0.0f, 0.0f,
+ 0.0f, -1.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 1.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 0.0f
+ });
+ std::vector<float> scores({
+ 0.0f, 0.9f, 0.8f,
+ 0.0f, 0.75f, 0.72f,
+ 0.0f, 0.6f, 0.5f,
+ 0.0f, 0.93f, 0.95f,
+ 0.0f, 0.5f, 0.4f,
+ 0.0f, 0.3f, 0.2f
+ });
+ std::vector<float> anchors({
+ 0.5f, 0.5f, 1.0f, 1.0f,
+ 0.5f, 0.5f, 1.0f, 1.0f,
+ 0.5f, 0.5f, 1.0f, 1.0f,
+ 0.5f, 10.5f, 1.0f, 1.0f,
+ 0.5f, 10.5f, 1.0f, 1.0f,
+ 0.5f, 100.5f, 1.0f, 1.0f
+ });
+ DetectionPostProcessFastNmsEndToEnd<armnn::DataType::Float32>(defaultBackends, boxEncodings, scores, anchors);
+}
+
+BOOST_AUTO_TEST_CASE(RefDetectionPostProcessFastNmsUint8Test)
+{
+ armnn::TensorInfo boxEncodingsInfo({ 1, 6, 4 }, armnn::DataType::Float32);
+ armnn::TensorInfo scoresInfo({ 1, 6, 3 }, armnn::DataType::Float32);
+ armnn::TensorInfo anchorsInfo({ 6, 4 }, armnn::DataType::Float32);
+
+ boxEncodingsInfo.SetQuantizationScale(1.0f);
+ boxEncodingsInfo.SetQuantizationOffset(1);
+ scoresInfo.SetQuantizationScale(0.01f);
+ scoresInfo.SetQuantizationOffset(0);
+ anchorsInfo.SetQuantizationScale(0.5f);
+ anchorsInfo.SetQuantizationOffset(0);
+
+ std::vector<float> boxEncodings({
+ 0.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 1.0f, 0.0f, 0.0f,
+ 0.0f, -1.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 1.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 0.0f
+ });
+ std::vector<float> scores({
+ 0.0f, 0.9f, 0.8f,
+ 0.0f, 0.75f, 0.72f,
+ 0.0f, 0.6f, 0.5f,
+ 0.0f, 0.93f, 0.95f,
+ 0.0f, 0.5f, 0.4f,
+ 0.0f, 0.3f, 0.2f
+ });
+ std::vector<float> anchors({
+ 0.5f, 0.5f, 1.0f, 1.0f,
+ 0.5f, 0.5f, 1.0f, 1.0f,
+ 0.5f, 0.5f, 1.0f, 1.0f,
+ 0.5f, 10.5f, 1.0f, 1.0f,
+ 0.5f, 10.5f, 1.0f, 1.0f,
+ 0.5f, 100.5f, 1.0f, 1.0f
+ });
+
+ std::vector<uint8_t> qBoxEncodings(boxEncodings.size(), 0);
+ std::vector<uint8_t> qScores(scores.size(), 0);
+ std::vector<uint8_t> qAnchors(anchors.size(), 0);
+ QuantizeData(qBoxEncodings.data(), boxEncodings.data(), boxEncodingsInfo);
+ QuantizeData(qScores.data(), scores.data(), scoresInfo);
+ QuantizeData(qAnchors.data(), anchors.data(), anchorsInfo);
+ DetectionPostProcessFastNmsEndToEnd<armnn::DataType::QuantisedAsymm8>(defaultBackends, qBoxEncodings,
+ qScores, qAnchors,
+ 1.0f, 1, 0.01f, 0, 0.5f, 0);
+}
+
BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/neon/workloads/CMakeLists.txt b/src/backends/neon/workloads/CMakeLists.txt
index cf3789e5ac..44db6d263a 100644
--- a/src/backends/neon/workloads/CMakeLists.txt
+++ b/src/backends/neon/workloads/CMakeLists.txt
@@ -30,6 +30,8 @@ list(APPEND armnnNeonBackendWorkloads_sources
NeonDepthwiseConvolutionWorkload.hpp
NeonDequantizeWorkload.cpp
NeonDequantizeWorkload.hpp
+ NeonDetectionPostProcessWorkload.cpp
+ NeonDetectionPostProcessWorkload.hpp
NeonFloorFloatWorkload.cpp
NeonFloorFloatWorkload.hpp
NeonFullyConnectedWorkload.cpp
diff --git a/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp
new file mode 100644
index 0000000000..481e950953
--- /dev/null
+++ b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp
@@ -0,0 +1,112 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "NeonDetectionPostProcessWorkload.hpp"
+
+#include "NeonWorkloadUtils.hpp"
+
+#include <aclCommon/ArmComputeTensorHandle.hpp>
+#include <aclCommon/ArmComputeTensorUtils.hpp>
+
+#include <boost/cast.hpp>
+
+namespace armnn
+{
+
+arm_compute::DetectionPostProcessLayerInfo MakeInfo(const DetectionPostProcessDescriptor& desc)
+{
+ return arm_compute::DetectionPostProcessLayerInfo(desc.m_MaxDetections,
+ desc.m_MaxClassesPerDetection,
+ desc.m_NmsScoreThreshold,
+ desc.m_NmsIouThreshold,
+ desc.m_NumClasses,
+ { desc.m_ScaleX,
+ desc.m_ScaleY,
+ desc.m_ScaleW,
+ desc.m_ScaleH },
+ desc.m_UseRegularNms,
+ desc.m_DetectionsPerClass);
+}
+
+arm_compute::Status NeonDetectionPostProcessValidate(const TensorInfo& boxEncodings,
+ const TensorInfo& scores,
+ const TensorInfo& anchors,
+ const TensorInfo& detectionBoxes,
+ const TensorInfo& detectionClasses,
+ const TensorInfo& detectionScores,
+ const TensorInfo& numDetections,
+ const DetectionPostProcessDescriptor &desc)
+{
+ arm_compute::DetectionPostProcessLayerInfo info = MakeInfo(desc);
+
+ const arm_compute::TensorInfo aclBoxEncodings =
+ armcomputetensorutils::BuildArmComputeTensorInfo(boxEncodings);
+
+ const arm_compute::TensorInfo aclScores =
+ armcomputetensorutils::BuildArmComputeTensorInfo(scores);
+
+ const arm_compute::TensorInfo aclAnchors =
+ armcomputetensorutils::BuildArmComputeTensorInfo(anchors);
+
+ arm_compute::TensorInfo aclDetectionBoxes =
+ armcomputetensorutils::BuildArmComputeTensorInfo(detectionBoxes);
+
+ arm_compute::TensorInfo aclDetectionClasses =
+ armcomputetensorutils::BuildArmComputeTensorInfo(detectionClasses);
+
+ arm_compute::TensorInfo aclDetectionScores =
+ armcomputetensorutils::BuildArmComputeTensorInfo(detectionScores);
+
+ arm_compute::TensorInfo aclNumDetections =
+ armcomputetensorutils::BuildArmComputeTensorInfo(numDetections);
+
+ return arm_compute::CPPDetectionPostProcessLayer::validate(
+ &aclBoxEncodings,
+ &aclScores,
+ &aclAnchors,
+ &aclDetectionBoxes,
+ &aclDetectionClasses,
+ &aclDetectionScores,
+ &aclNumDetections,
+ info);
+}
+
+NeonDetectionPostProcessWorkload::NeonDetectionPostProcessWorkload(
+ const DetectionPostProcessQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : BaseWorkload<DetectionPostProcessQueueDescriptor>(descriptor, info)
+{
+ m_Anchors = std::make_unique<arm_compute::Tensor>();
+ BuildArmComputeTensor(*m_Anchors, descriptor.m_Anchors->GetTensorInfo());
+
+ arm_compute::DetectionPostProcessLayerInfo di = MakeInfo(m_Data.m_Parameters);
+
+ auto AclTensorRef = [](ITensorHandle* tensor) -> arm_compute::ITensor&
+ {
+ return boost::polymorphic_downcast<IAclTensorHandle*>(tensor)->GetTensor();
+ };
+
+ arm_compute::ITensor& boxEncodings = AclTensorRef(m_Data.m_Inputs[0]);
+ arm_compute::ITensor& scores = AclTensorRef(m_Data.m_Inputs[1]);
+
+ arm_compute::ITensor& detectionBoxes = AclTensorRef(m_Data.m_Outputs[0]);
+ arm_compute::ITensor& detectionClasses = AclTensorRef(m_Data.m_Outputs[1]);
+ arm_compute::ITensor& detectionScores = AclTensorRef(m_Data.m_Outputs[2]);
+ arm_compute::ITensor& numDetections = AclTensorRef(m_Data.m_Outputs[3]);
+
+ m_Func.configure(&boxEncodings, &scores, m_Anchors.get(),
+ &detectionBoxes, &detectionClasses, &detectionScores, &numDetections,
+ di);
+
+ InitializeArmComputeTensorData(*m_Anchors, m_Data.m_Anchors);
+}
+
+void NeonDetectionPostProcessWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDetectionPostProcessWorkload_Execute");
+ m_Func.run();
+}
+
+} // namespace armnn
diff --git a/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp
new file mode 100644
index 0000000000..f7f0ebffb9
--- /dev/null
+++ b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp
@@ -0,0 +1,40 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/core/Error.h>
+#include <arm_compute/runtime/CPP/functions/CPPDetectionPostProcessLayer.h>
+
+namespace armnn
+{
+
+arm_compute::Status NeonDetectionPostProcessValidate(const TensorInfo& boxEncodings,
+ const TensorInfo& scores,
+ const TensorInfo& anchors,
+ const TensorInfo& detectionBoxes,
+ const TensorInfo& detectionClasses,
+ const TensorInfo& detectionScores,
+ const TensorInfo& numDetections,
+ const DetectionPostProcessDescriptor &desc);
+
+class NeonDetectionPostProcessWorkload : public BaseWorkload<DetectionPostProcessQueueDescriptor>
+{
+public:
+ NeonDetectionPostProcessWorkload(
+ const DetectionPostProcessQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ mutable arm_compute::CPPDetectionPostProcessLayer m_Func;
+
+ std::unique_ptr<arm_compute::Tensor> m_Anchors;
+
+};
+
+} // namespace armnn \ No newline at end of file
diff --git a/src/backends/neon/workloads/NeonWorkloads.hpp b/src/backends/neon/workloads/NeonWorkloads.hpp
index 77d819e3ab..dc9bef31b2 100644
--- a/src/backends/neon/workloads/NeonWorkloads.hpp
+++ b/src/backends/neon/workloads/NeonWorkloads.hpp
@@ -16,6 +16,7 @@
#include "NeonDepthToSpaceWorkload.hpp"
#include "NeonDepthwiseConvolutionWorkload.hpp"
#include "NeonDequantizeWorkload.hpp"
+#include "NeonDetectionPostProcessWorkload.hpp"
#include "NeonFloorFloatWorkload.hpp"
#include "NeonFullyConnectedWorkload.hpp"
#include "NeonGreaterWorkload.hpp"
diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
index 30ce5c69b5..299503ddc6 100644
--- a/src/backends/reference/RefLayerSupport.cpp
+++ b/src/backends/reference/RefLayerSupport.cpp
@@ -637,10 +637,15 @@ bool RefLayerSupport::IsDequantizeSupported(const TensorInfo& input,
return supported;
}
-bool RefLayerSupport::IsDetectionPostProcessSupported(const armnn::TensorInfo& input0,
- const armnn::TensorInfo& input1,
- const armnn::DetectionPostProcessDescriptor& descriptor,
- armnn::Optional<std::string&> reasonIfUnsupported) const
+bool RefLayerSupport::IsDetectionPostProcessSupported(const TensorInfo& boxEncodings,
+ const TensorInfo& scores,
+ const TensorInfo& anchors,
+ const TensorInfo& detectionBoxes,
+ const TensorInfo& detectionClasses,
+ const TensorInfo& detectionScores,
+ const TensorInfo& numDetections,
+ const DetectionPostProcessDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported) const
{
bool supported = true;
@@ -651,10 +656,10 @@ bool RefLayerSupport::IsDetectionPostProcessSupported(const armnn::TensorInfo& i
DataType::QuantisedSymm16
};
- supported &= CheckSupportRule(TypeAnyOf(input0, supportedInputTypes), reasonIfUnsupported,
+ supported &= CheckSupportRule(TypeAnyOf(boxEncodings, supportedInputTypes), reasonIfUnsupported,
"Reference DetectionPostProcess: input 0 is not a supported type.");
- supported &= CheckSupportRule(TypeAnyOf(input1, supportedInputTypes), reasonIfUnsupported,
+ supported &= CheckSupportRule(TypeAnyOf(scores, supportedInputTypes), reasonIfUnsupported,
"Reference DetectionPostProcess: input 1 is not a supported type.");
return supported;
diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp
index 04b355ee0a..a7d6303d86 100644
--- a/src/backends/reference/RefLayerSupport.hpp
+++ b/src/backends/reference/RefLayerSupport.hpp
@@ -94,8 +94,13 @@ public:
const TensorInfo& output,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
- bool IsDetectionPostProcessSupported(const TensorInfo& input0,
- const TensorInfo& input1,
+ bool IsDetectionPostProcessSupported(const TensorInfo& boxEncodings,
+ const TensorInfo& scores,
+ const TensorInfo& anchors,
+ const TensorInfo& detectionBoxes,
+ const TensorInfo& detectionClasses,
+ const TensorInfo& detectionScores,
+ const TensorInfo& numDetections,
const DetectionPostProcessDescriptor& descriptor,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;