aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNarumol Prangnawarat <narumol.prangnawarat@arm.com>2019-01-31 15:31:54 +0000
committerNarumol Prangnawarat <narumol.prangnawarat@arm.com>2019-02-04 10:57:48 +0000
commitbc67cef3e3dc9e7fe9c4331495009eda48c89527 (patch)
tree6a15af84fbc5989d25213790554acbb46cda5165
parentc981df3bb24df1f98c233d885e73a2ea5c6d3449 (diff)
downloadarmnn-bc67cef3e3dc9e7fe9c4331495009eda48c89527.tar.gz
IVGCVSW-2557 Ref Workload Implementation for Detection PostProcess
* implementation of DetectionPostProcessQueueDescriptor validate * add Uint8ToFloat32Workload * add implementation of Detection PostProcess functionalities * add ref workload implemenentation for float and uint8 * add layer support for Detection PostProcess in ref * unit tests Change-Id: I650461f49edbb3c533d68ef8700377af51bc3592
-rw-r--r--include/armnn/Descriptors.hpp2
-rw-r--r--src/backends/backendsCommon/Workload.hpp5
-rw-r--r--src/backends/backendsCommon/WorkloadData.cpp53
-rw-r--r--src/backends/backendsCommon/WorkloadData.hpp7
-rw-r--r--src/backends/reference/RefLayerSupport.cpp12
-rw-r--r--src/backends/reference/RefLayerSupport.hpp5
-rw-r--r--src/backends/reference/RefWorkloadFactory.cpp2
-rw-r--r--src/backends/reference/backend.mk3
-rw-r--r--src/backends/reference/test/CMakeLists.txt1
-rw-r--r--src/backends/reference/test/RefDetectionPostProcessTests.cpp172
-rw-r--r--src/backends/reference/workloads/CMakeLists.txt6
-rw-r--r--src/backends/reference/workloads/DetectionPostProcess.cpp264
-rw-r--r--src/backends/reference/workloads/DetectionPostProcess.hpp29
-rw-r--r--src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.cpp48
-rw-r--r--src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.hpp25
-rw-r--r--src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.cpp52
-rw-r--r--src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.hpp25
-rw-r--r--src/backends/reference/workloads/RefWorkloads.hpp2
18 files changed, 711 insertions, 2 deletions
diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp
index 29d294e69f..b14ed0b1ab 100644
--- a/include/armnn/Descriptors.hpp
+++ b/include/armnn/Descriptors.hpp
@@ -354,7 +354,7 @@ struct DetectionPostProcessDescriptor
/// Intersection over union threshold.
float m_NmsIouThreshold;
/// Number of classes.
- int32_t m_NumClasses;
+ uint32_t m_NumClasses;
/// Use Regular NMS.
bool m_UseRegularNms;
/// Center size encoding scale x.
diff --git a/src/backends/backendsCommon/Workload.hpp b/src/backends/backendsCommon/Workload.hpp
index 4d14adbf54..7fb26f8b56 100644
--- a/src/backends/backendsCommon/Workload.hpp
+++ b/src/backends/backendsCommon/Workload.hpp
@@ -187,4 +187,9 @@ using Float32ToFloat16Workload = MultiTypedWorkload<QueueDescriptor,
armnn::DataType::Float32,
armnn::DataType::Float16>;
+template <typename QueueDescriptor>
+using Uint8ToFloat32Workload = MultiTypedWorkload<QueueDescriptor,
+ armnn::DataType::QuantisedAsymm8,
+ armnn::DataType::Float32>;
+
} //namespace armnn
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index 9714b02a80..b31d626550 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -1082,6 +1082,59 @@ void GatherQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
ValidateTensorNumDimensions(output, "GatherQueueDescriptor", outputDim, "output");
}
+void DetectionPostProcessQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
+{
+ ValidateTwoInputs(workloadInfo, "DetectionPostProcessQueueDescriptor");
+
+ if (workloadInfo.m_OutputTensorInfos.size() != 4)
+ {
+ throw InvalidArgumentException("DetectionPostProcessQueueDescriptor: Requires exactly four outputs. " +
+ to_string(workloadInfo.m_OutputTensorInfos.size()) + " has been provided.");
+ }
+
+ if (m_Anchors == nullptr)
+ {
+ throw InvalidArgumentException("DetectionPostProcessQueueDescriptor: Anchors tensor descriptor is missing.");
+ }
+
+ const TensorInfo& boxEncodingsInfo = workloadInfo.m_InputTensorInfos[0];
+ const TensorInfo& scoresInfo = workloadInfo.m_InputTensorInfos[1];
+ const TensorInfo& anchorsInfo = m_Anchors->GetTensorInfo();
+ const TensorInfo& detectionBoxesInfo = workloadInfo.m_OutputTensorInfos[0];
+ const TensorInfo& detectionScoresInfo = workloadInfo.m_OutputTensorInfos[1];
+ const TensorInfo& detectionClassesInfo = workloadInfo.m_OutputTensorInfos[2];
+ const TensorInfo& numDetectionsInfo = workloadInfo.m_OutputTensorInfos[3];
+
+ ValidateTensorNumDimensions(boxEncodingsInfo, "DetectionPostProcessQueueDescriptor", 3, "box encodings");
+ ValidateTensorNumDimensions(scoresInfo, "DetectionPostProcessQueueDescriptor", 3, "scores");
+ ValidateTensorNumDimensions(anchorsInfo, "DetectionPostProcessQueueDescriptor", 2, "anchors");
+
+ ValidateTensorNumDimensions(detectionBoxesInfo, "DetectionPostProcessQueueDescriptor", 3, "detection boxes");
+ ValidateTensorNumDimensions(detectionScoresInfo, "DetectionPostProcessQueueDescriptor", 2, "detection scores");
+ ValidateTensorNumDimensions(detectionClassesInfo, "DetectionPostProcessQueueDescriptor", 2, "detection classes");
+ ValidateTensorNumDimensions(numDetectionsInfo, "DetectionPostProcessQueueDescriptor", 1, "num detections");
+
+ ValidateTensorDataType(detectionBoxesInfo, DataType::Float32,
+ "DetectionPostProcessQueueDescriptor", "detection boxes");
+ ValidateTensorDataType(detectionScoresInfo, DataType::Float32,
+ "DetectionPostProcessQueueDescriptor", "detection scores");
+ ValidateTensorDataType(detectionClassesInfo, DataType::Float32,
+ "DetectionPostProcessQueueDescriptor", "detection classes");
+ ValidateTensorDataType(numDetectionsInfo, DataType::Float32,
+ "DetectionPostProcessQueueDescriptor", "num detections");
+
+ if (m_Parameters.m_NmsIouThreshold <= 0.0f || m_Parameters.m_NmsIouThreshold > 1.0f)
+ {
+ throw InvalidArgumentException("DetectionPostProcessQueueDescriptor: Intersection over union threshold "
+ "must be positive and less than or equal to 1.");
+ }
+ if (scoresInfo.GetShape()[2] != m_Parameters.m_NumClasses + 1)
+ {
+ throw InvalidArgumentException("DetectionPostProcessQueueDescriptor: Number of classes with background "
+ "should be equal to number of classes + 1.");
+ }
+}
+
void PreCompiledQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
{
// This is internally generated so it should not need validation.
diff --git a/src/backends/backendsCommon/WorkloadData.hpp b/src/backends/backendsCommon/WorkloadData.hpp
index e44eba71af..09f56479cd 100644
--- a/src/backends/backendsCommon/WorkloadData.hpp
+++ b/src/backends/backendsCommon/WorkloadData.hpp
@@ -171,6 +171,13 @@ struct DepthwiseConvolution2dQueueDescriptor : QueueDescriptorWithParameters<Dep
struct DetectionPostProcessQueueDescriptor : QueueDescriptorWithParameters<DetectionPostProcessDescriptor>
{
+ DetectionPostProcessQueueDescriptor()
+ : m_Anchors(nullptr)
+ {
+ }
+
+ const ConstCpuTensorHandle* m_Anchors;
+
void Validate(const WorkloadInfo& workloadInfo) const;
};
diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
index 78e44bd6a3..4b32a8938d 100644
--- a/src/backends/reference/RefLayerSupport.cpp
+++ b/src/backends/reference/RefLayerSupport.cpp
@@ -203,6 +203,18 @@ bool RefLayerSupport::IsDepthwiseConvolutionSupported(const TensorInfo& input,
&TrueFunc<>);
}
+bool RefLayerSupport::IsDetectionPostProcessSupported(const armnn::TensorInfo& input0,
+ const armnn::TensorInfo& input1,
+ const armnn::DetectionPostProcessDescriptor& descriptor,
+ armnn::Optional<std::string&> reasonIfUnsupported) const
+{
+ ignore_unused(input1);
+ return IsSupportedForDataTypeRef(reasonIfUnsupported,
+ input0.GetDataType(),
+ &TrueFunc<>,
+ &TrueFunc<>);
+}
+
bool RefLayerSupport::IsDivisionSupported(const TensorInfo& input0,
const TensorInfo& input1,
const TensorInfo& output,
diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp
index 5fe111ba15..3b73f22927 100644
--- a/src/backends/reference/RefLayerSupport.hpp
+++ b/src/backends/reference/RefLayerSupport.hpp
@@ -66,6 +66,11 @@ public:
const Optional<TensorInfo>& biases,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+ bool IsDetectionPostProcessSupported(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const DetectionPostProcessDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
bool IsDivisionSupported(const TensorInfo& input0,
const TensorInfo& input1,
const TensorInfo& output,
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index 103abdd1ce..9c1ce1e013 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -154,7 +154,7 @@ std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDepthwiseConvolution2d(
std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDetectionPostProcess(
const armnn::DetectionPostProcessQueueDescriptor& descriptor, const armnn::WorkloadInfo& info) const
{
- return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
+ return MakeWorkload<RefDetectionPostProcessFloat32Workload, RefDetectionPostProcessUint8Workload>(descriptor, info);
}
std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateNormalization(
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index 3ee07913dc..acaedc9688 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -16,6 +16,7 @@ BACKEND_SOURCES := \
workloads/Broadcast.cpp \
workloads/ConvImpl.cpp \
workloads/Debug.cpp \
+ workloads/DetectionPostProcess.cpp \
workloads/ElementwiseFunction.cpp \
workloads/FullyConnected.cpp \
workloads/Gather.cpp \
@@ -37,6 +38,8 @@ BACKEND_SOURCES := \
workloads/RefDebugWorkload.cpp \
workloads/RefDepthwiseConvolution2dFloat32Workload.cpp \
workloads/RefDepthwiseConvolution2dUint8Workload.cpp \
+ workloads/RefDetectionPostProcessFloat32Workload.cpp \
+ workloads/RefDetectionPostProcessUint8Workload.cpp \
workloads/RefElementwiseWorkload.cpp \
workloads/RefFakeQuantizationFloat32Workload.cpp \
workloads/RefFloorFloat32Workload.cpp \
diff --git a/src/backends/reference/test/CMakeLists.txt b/src/backends/reference/test/CMakeLists.txt
index 8fa9b5cdaf..9e5711e608 100644
--- a/src/backends/reference/test/CMakeLists.txt
+++ b/src/backends/reference/test/CMakeLists.txt
@@ -5,6 +5,7 @@
list(APPEND armnnRefBackendUnitTests_sources
RefCreateWorkloadTests.cpp
+ RefDetectionPostProcessTests.cpp
RefEndToEndTests.cpp
RefJsonPrinterTests.cpp
RefLayerSupportTests.cpp
diff --git a/src/backends/reference/test/RefDetectionPostProcessTests.cpp b/src/backends/reference/test/RefDetectionPostProcessTests.cpp
new file mode 100644
index 0000000000..39403f0284
--- /dev/null
+++ b/src/backends/reference/test/RefDetectionPostProcessTests.cpp
@@ -0,0 +1,172 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "reference/workloads/DetectionPostProcess.cpp"
+
+#include <armnn/Descriptors.hpp>
+#include <armnn/Types.hpp>
+
+#include <boost/test/unit_test.hpp>
+
+BOOST_AUTO_TEST_SUITE(RefDetectionPostProcess)
+
+
+BOOST_AUTO_TEST_CASE(TopKSortTest)
+{
+ unsigned int k = 3;
+ unsigned int indices[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+ float values[8] = { 0, 7, 6, 5, 4, 3, 2, 500 };
+ TopKSort(k, indices, values, 8);
+ BOOST_TEST(indices[0] == 7);
+ BOOST_TEST(indices[1] == 1);
+ BOOST_TEST(indices[2] == 2);
+}
+
+BOOST_AUTO_TEST_CASE(FullTopKSortTest)
+{
+ unsigned int k = 8;
+ unsigned int indices[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+ float values[8] = { 0, 7, 6, 5, 4, 3, 2, 500 };
+ TopKSort(k, indices, values, 8);
+ BOOST_TEST(indices[0] == 7);
+ BOOST_TEST(indices[1] == 1);
+ BOOST_TEST(indices[2] == 2);
+ BOOST_TEST(indices[3] == 3);
+ BOOST_TEST(indices[4] == 4);
+ BOOST_TEST(indices[5] == 5);
+ BOOST_TEST(indices[6] == 6);
+ BOOST_TEST(indices[7] == 0);
+}
+
+BOOST_AUTO_TEST_CASE(IouTest)
+{
+ float boxI[4] = { 0.0f, 0.0f, 10.0f, 10.0f };
+ float boxJ[4] = { 1.0f, 1.0f, 11.0f, 11.0f };
+ float iou = IntersectionOverUnion(boxI, boxJ);
+ BOOST_TEST(iou == 0.68, boost::test_tools::tolerance(0.001));
+}
+
+BOOST_AUTO_TEST_CASE(NmsFunction)
+{
+ std::vector<float> boxCorners({
+ 0.0f, 0.0f, 1.0f, 1.0f,
+ 0.0f, 0.1f, 1.0f, 1.1f,
+ 0.0f, -0.1f, 1.0f, 0.9f,
+ 0.0f, 10.0f, 1.0f, 11.0f,
+ 0.0f, 10.1f, 1.0f, 11.1f,
+ 0.0f, 100.0f, 1.0f, 101.0f
+ });
+
+ std::vector<float> scores({ 0.9f, 0.75f, 0.6f, 0.93f, 0.5f, 0.3f });
+
+ std::vector<unsigned int> result = NonMaxSuppression(6, boxCorners, scores, 0.0, 3, 0.5);
+ BOOST_TEST(result.size() == 3);
+ BOOST_TEST(result[0] == 3);
+ BOOST_TEST(result[1] == 0);
+ BOOST_TEST(result[2] == 5);
+}
+
+void DetectionPostProcessTestImpl(bool useRegularNms, const std::vector<float>& expectedDetectionBoxes,
+ const std::vector<float>& expectedDetectionClasses,
+ const std::vector<float>& expectedDetectionScores,
+ const std::vector<float>& expectedNumDetections)
+{
+ armnn::TensorInfo boxEncodingsInfo({ 1, 6, 4 }, armnn::DataType::Float32);
+ armnn::TensorInfo scoresInfo({ 1, 6, 4 }, armnn::DataType::Float32);
+ armnn::TensorInfo anchorsInfo({ 1, 6, 4 }, armnn::DataType::Float32);
+
+ armnn::TensorInfo detectionBoxesInfo({ 1, 3, 4 }, armnn::DataType::Float32);
+ armnn::TensorInfo detectionScoresInfo({ 1, 3 }, armnn::DataType::Float32);
+ armnn::TensorInfo detectionClassesInfo({ 1, 3 }, armnn::DataType::Float32);
+ armnn::TensorInfo numDetectionInfo({ 1 }, armnn::DataType::Float32);
+
+ armnn::DetectionPostProcessDescriptor desc;
+ desc.m_UseRegularNms = useRegularNms;
+ desc.m_MaxDetections = 3;
+ desc.m_MaxClassesPerDetection = 1;
+ desc.m_DetectionsPerClass =1;
+ desc.m_NmsScoreThreshold = 0.0;
+ desc.m_NmsIouThreshold = 0.5;
+ desc.m_NumClasses = 2;
+ desc.m_ScaleY = 10.0;
+ desc.m_ScaleX = 10.0;
+ desc.m_ScaleH = 5.0;
+ desc.m_ScaleW = 5.0;
+
+ std::vector<float> boxEncodings({
+ 0.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 1.0f, 0.0f, 0.0f,
+ 0.0f, -1.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 1.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 0.0f
+ });
+ std::vector<float> scores({
+ 0.0f, 0.9f, 0.8f,
+ 0.0f, 0.75f, 0.72f,
+ 0.0f, 0.6f, 0.5f,
+ 0.0f, 0.93f, 0.95f,
+ 0.0f, 0.5f, 0.4f,
+ 0.0f, 0.3f, 0.2f
+ });
+ std::vector<float> anchors({
+ 0.5f, 0.5f, 1.0f, 1.0f,
+ 0.5f, 0.5f, 1.0f, 1.0f,
+ 0.5f, 0.5f, 1.0f, 1.0f,
+ 0.5f, 10.5f, 1.0f, 1.0f,
+ 0.5f, 10.5f, 1.0f, 1.0f,
+ 0.5f, 100.5f, 1.0f, 1.0f
+ });
+
+ std::vector<float> detectionBoxes(detectionBoxesInfo.GetNumElements());
+ std::vector<float> detectionScores(detectionScoresInfo.GetNumElements());
+ std::vector<float> detectionClasses(detectionClassesInfo.GetNumElements());
+ std::vector<float> numDetections(1);
+
+ armnn::DetectionPostProcess(boxEncodingsInfo, scoresInfo, anchorsInfo,
+ detectionBoxesInfo, detectionClassesInfo,
+ detectionScoresInfo, numDetectionInfo, desc,
+ boxEncodings.data(), scores.data(), anchors.data(),
+ detectionBoxes.data(), detectionClasses.data(),
+ detectionScores.data(), numDetections.data());
+
+ BOOST_TEST(detectionBoxes == expectedDetectionBoxes);
+ BOOST_TEST(detectionScores == expectedDetectionScores);
+ BOOST_TEST(detectionClasses == expectedDetectionClasses);
+ BOOST_TEST(numDetections == expectedNumDetections);
+}
+
+BOOST_AUTO_TEST_CASE(RegularNmsDetectionPostProcess)
+{
+ std::vector<float> expectedDetectionBoxes({
+ 0.0f, 10.0f, 1.0f, 11.0f,
+ 0.0f, 10.0f, 1.0f, 11.0f,
+ 0.0f, 0.0f, 0.0f, 0.0f
+ });
+
+ std::vector<float> expectedDetectionScores({ 0.95f, 0.93f, 0.0f });
+ std::vector<float> expectedDetectionClasses({ 1.0f, 0.0f, 0.0f });
+ std::vector<float> expectedNumDetections({ 2.0f });
+
+ DetectionPostProcessTestImpl(true, expectedDetectionBoxes, expectedDetectionClasses,
+ expectedDetectionScores, expectedNumDetections);
+}
+
+BOOST_AUTO_TEST_CASE(FastNmsDetectionPostProcess)
+{
+ std::vector<float> expectedDetectionBoxes({
+ 0.0f, 10.0f, 1.0f, 11.0f,
+ 0.0f, 0.0f, 1.0f, 1.0f,
+ 0.0f, 100.0f, 1.0f, 101.0f
+ });
+ std::vector<float> expectedDetectionScores({ 0.95f, 0.9f, 0.3f });
+ std::vector<float> expectedDetectionClasses({ 1.0f, 0.0f, 0.0f });
+ std::vector<float> expectedNumDetections({ 3.0f });
+
+ DetectionPostProcessTestImpl(false, expectedDetectionBoxes, expectedDetectionClasses,
+ expectedDetectionScores, expectedNumDetections);
+}
+
+BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index 57e89fa456..47e42f7fcc 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -15,6 +15,8 @@ list(APPEND armnnRefBackendWorkloads_sources
ConvImpl.hpp
Debug.cpp
Debug.hpp
+ DetectionPostProcess.cpp
+ DetectionPostProcess.hpp
ElementwiseFunction.cpp
ElementwiseFunction.hpp
FullyConnected.cpp
@@ -60,6 +62,10 @@ list(APPEND armnnRefBackendWorkloads_sources
RefDepthwiseConvolution2dFloat32Workload.hpp
RefDepthwiseConvolution2dUint8Workload.cpp
RefDepthwiseConvolution2dUint8Workload.hpp
+ RefDetectionPostProcessUint8Workload.cpp
+ RefDetectionPostProcessUint8Workload.hpp
+ RefDetectionPostProcessFloat32Workload.cpp
+ RefDetectionPostProcessFloat32Workload.hpp
RefFakeQuantizationFloat32Workload.cpp
RefFakeQuantizationFloat32Workload.hpp
RefFloorFloat32Workload.cpp
diff --git a/src/backends/reference/workloads/DetectionPostProcess.cpp b/src/backends/reference/workloads/DetectionPostProcess.cpp
new file mode 100644
index 0000000000..958de8294b
--- /dev/null
+++ b/src/backends/reference/workloads/DetectionPostProcess.cpp
@@ -0,0 +1,264 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "DetectionPostProcess.hpp"
+
+#include <armnn/ArmNN.hpp>
+
+#include <boost/numeric/conversion/cast.hpp>
+
+#include <algorithm>
+#include <numeric>
+
+namespace
+{
+
+std::vector<unsigned int> GenerateRangeK(unsigned int k)
+{
+ std::vector<unsigned int> range(k);
+ std::iota(range.begin(), range.end(), 0);
+ return range;
+}
+
+void TopKSort(unsigned int k, unsigned int* indices, const float* values, unsigned int numElement)
+{
+ std::partial_sort(indices, indices + k, indices + numElement,
+ [&values](unsigned int i, unsigned int j) { return values[i] > values[j]; });
+}
+
+float IntersectionOverUnion(const float* boxI, const float* boxJ)
+{
+ // Box-corner format: ymin, xmin, ymax, xmax.
+ const int yMin = 0;
+ const int xMin = 1;
+ const int yMax = 2;
+ const int xMax = 3;
+ float areaI = (boxI[yMax] - boxI[yMin]) * (boxI[xMax] - boxI[xMin]);
+ float areaJ = (boxJ[yMax] - boxJ[yMin]) * (boxJ[xMax] - boxJ[xMin]);
+ float yMinIntersection = std::max(boxI[yMin], boxJ[yMin]);
+ float xMinIntersection = std::max(boxI[xMin], boxJ[xMin]);
+ float yMaxIntersection = std::min(boxI[yMax], boxJ[yMax]);
+ float xMaxIntersection = std::min(boxI[xMax], boxJ[xMax]);
+ float areaIntersection = std::max(yMaxIntersection - yMinIntersection, 0.0f) *
+ std::max(xMaxIntersection - xMinIntersection, 0.0f);
+ float areaUnion = areaI + areaJ - areaIntersection;
+ return areaIntersection / areaUnion;
+}
+
+std::vector<unsigned int> NonMaxSuppression(unsigned int numBoxes, const std::vector<float>& boxCorners,
+ const std::vector<float>& scores, float nmsScoreThreshold,
+ unsigned int maxDetection, float nmsIouThreshold)
+{
+ // Select boxes that have scores above a given threshold.
+ std::vector<float> scoresAboveThreshold;
+ std::vector<unsigned int> indicesAboveThreshold;
+ for (unsigned int i = 0; i < numBoxes; ++i)
+ {
+ if (scores[i] >= nmsScoreThreshold)
+ {
+ scoresAboveThreshold.push_back(scores[i]);
+ indicesAboveThreshold.push_back(i);
+ }
+ }
+
+ // Sort the indices based on scores.
+ unsigned int numAboveThreshold = boost::numeric_cast<unsigned int>(scoresAboveThreshold.size());
+ std::vector<unsigned int> sortedIndices = GenerateRangeK(numAboveThreshold);
+ TopKSort(numAboveThreshold,sortedIndices.data(), scoresAboveThreshold.data(), numAboveThreshold);
+
+ // Number of output cannot be more than max detections specified in the option.
+ unsigned int numOutput = std::min(maxDetection, numAboveThreshold);
+ std::vector<unsigned int> outputIndices;
+ std::vector<bool> visited(numAboveThreshold, false);
+
+ // Prune out the boxes with high intersection over union by keeping the box with higher score.
+ for (unsigned int i = 0; i < numAboveThreshold; ++i)
+ {
+ if (outputIndices.size() >= numOutput)
+ {
+ break;
+ }
+ if (!visited[sortedIndices[i]])
+ {
+ outputIndices.push_back(indicesAboveThreshold[sortedIndices[i]]);
+ }
+ for (unsigned int j = i + 1; j < numAboveThreshold; ++j)
+ {
+ unsigned int iIndex = indicesAboveThreshold[sortedIndices[i]] * 4;
+ unsigned int jIndex = indicesAboveThreshold[sortedIndices[j]] * 4;
+ if (IntersectionOverUnion(&boxCorners[iIndex], &boxCorners[jIndex]) > nmsIouThreshold)
+ {
+ visited[sortedIndices[j]] = true;
+ }
+ }
+ }
+ return outputIndices;
+}
+
+void AllocateOutputData(unsigned int numOutput, unsigned int numSelected, const std::vector<float>& boxCorners,
+ const std::vector<unsigned int>& outputIndices, const std::vector<unsigned int>& selectedBoxes,
+ const std::vector<unsigned int>& selectedClasses, const std::vector<float>& selectedScores,
+ float* detectionBoxes, float* detectionScores, float* detectionClasses, float* numDetections)
+{
+ for (unsigned int i = 0; i < numOutput; ++i)
+ {
+ unsigned int boxIndex = i * 4;
+ unsigned int boxConorIndex = selectedBoxes[outputIndices[i]] * 4;
+ if (i < numSelected)
+ {
+ detectionScores[i] = selectedScores[outputIndices[i]];
+ detectionClasses[i] = boost::numeric_cast<float>(selectedClasses[outputIndices[i]]);
+ detectionBoxes[boxIndex] = boxCorners[boxConorIndex];
+ detectionBoxes[boxIndex + 1] = boxCorners[boxConorIndex + 1];
+ detectionBoxes[boxIndex + 2] = boxCorners[boxConorIndex + 2];
+ detectionBoxes[boxIndex + 3] = boxCorners[boxConorIndex + 3];
+ }
+ else
+ {
+ detectionScores[i] = 0.0f;
+ detectionClasses[i] = 0.0f;
+ detectionBoxes[boxIndex] = 0.0f;
+ detectionBoxes[boxIndex + 1] = 0.0f;
+ detectionBoxes[boxIndex + 2] = 0.0f;
+ detectionBoxes[boxIndex + 3] = 0.0f;
+ }
+ }
+ numDetections[0] = boost::numeric_cast<float>(numOutput);
+}
+
+} // anonymous namespace
+
+namespace armnn
+{
+
+void DetectionPostProcess(const TensorInfo& boxEncodingsInfo,
+ const TensorInfo& scoresInfo,
+ const TensorInfo& anchorsInfo,
+ const TensorInfo& detectionBoxesInfo,
+ const TensorInfo& detectionClassesInfo,
+ const TensorInfo& detectionScoresInfo,
+ const TensorInfo& numDetectionsInfo,
+ const DetectionPostProcessDescriptor& desc,
+ const float* boxEncodings,
+ const float* scores,
+ const float* anchors,
+ float* detectionBoxes,
+ float* detectionClasses,
+ float* detectionScores,
+ float* numDetections)
+{
+ // Transform center-size format which is (ycenter, xcenter, height, width) to box-corner format,
+ // which represents the lower left corner and the upper right corner (ymin, xmin, ymax, xmax)
+ std::vector<float> boxCorners(boxEncodingsInfo.GetNumElements());
+ unsigned int numBoxes = boxEncodingsInfo.GetShape()[1];
+ for (unsigned int i = 0; i < numBoxes; ++i)
+ {
+ unsigned int indexY = i * 4;
+ unsigned int indexX = indexY + 1;
+ unsigned int indexH = indexX + 1;
+ unsigned int indexW = indexH + 1;
+ float yCentre = boxEncodings[indexY] / desc.m_ScaleY * anchors[indexH] + anchors[indexY];
+ float xCentre = boxEncodings[indexX] / desc.m_ScaleX * anchors[indexW] + anchors[indexX];
+ float halfH = 0.5f * expf(boxEncodings[indexH] / desc.m_ScaleH) * anchors[indexH];
+ float halfW = 0.5f * expf(boxEncodings[indexW] / desc.m_ScaleW) * anchors[indexW];
+ // ymin
+ boxCorners[indexY] = yCentre - halfH;
+ // xmin
+ boxCorners[indexX] = xCentre - halfW;
+ // ymax
+ boxCorners[indexH] = yCentre + halfH;
+ // xmax
+ boxCorners[indexW] = xCentre + halfW;
+
+ BOOST_ASSERT(boxCorners[indexY] < boxCorners[indexH]);
+ BOOST_ASSERT(boxCorners[indexX] < boxCorners[indexW]);
+ }
+
+ unsigned int numClassesWithBg = desc.m_NumClasses + 1;
+
+ // Perform Non Max Suppression.
+ if (desc.m_UseRegularNms)
+ {
+ // Perform Regular NMS.
+ // For each class, perform NMS and select max detection numbers of the highest score across all classes.
+ std::vector<float> classScores(numBoxes);
+ std::vector<unsigned int>selectedBoxesAfterNms;
+ std::vector<float> selectedScoresAfterNms;
+ std::vector<unsigned int> selectedClasses;
+
+ for (unsigned int c = 0; c < desc.m_NumClasses; ++c)
+ {
+ // For each boxes, get scores of the boxes for the class c.
+ for (unsigned int i = 0; i < numBoxes; ++i)
+ {
+ classScores[i] = scores[i * numClassesWithBg + c + 1];
+ }
+ std::vector<unsigned int> selectedIndices = NonMaxSuppression(numBoxes, boxCorners, classScores,
+ desc.m_NmsScoreThreshold,
+ desc.m_MaxClassesPerDetection,
+ desc.m_NmsIouThreshold);
+
+ for (unsigned int i = 0; i < selectedIndices.size(); ++i)
+ {
+ selectedBoxesAfterNms.push_back(selectedIndices[i]);
+ selectedScoresAfterNms.push_back(classScores[selectedIndices[i]]);
+ selectedClasses.push_back(c);
+ }
+ }
+
+ // Select max detection numbers of the highest score across all classes
+ unsigned int numSelected = boost::numeric_cast<unsigned int>(selectedBoxesAfterNms.size());
+ unsigned int numOutput = std::min(desc.m_MaxDetections, numSelected);
+
+ // Sort the max scores among the selected indices.
+ std::vector<unsigned int> outputIndices = GenerateRangeK(numSelected);
+ TopKSort(numOutput, outputIndices.data(), selectedScoresAfterNms.data(), numSelected);
+
+ AllocateOutputData(numOutput, numSelected, boxCorners, outputIndices,
+ selectedBoxesAfterNms, selectedClasses, selectedScoresAfterNms,
+ detectionBoxes, detectionScores, detectionClasses, numDetections);
+ }
+ else
+ {
+ // Perform Fast NMS.
+ // Select max scores of boxes and perform NMS on max scores,
+ // select max detection numbers of the highest score
+ unsigned int numClassesPerBox = std::min(desc.m_MaxClassesPerDetection, desc.m_NumClasses);
+ std::vector<float> maxScores;
+ std::vector<unsigned int>boxIndices;
+ std::vector<unsigned int>maxScoreClasses;
+
+ for (unsigned int box = 0; box < numBoxes; ++box)
+ {
+ unsigned int scoreIndex = box * numClassesWithBg + 1;
+
+ // Get the max scores of the box.
+ std::vector<unsigned int> maxScoreIndices = GenerateRangeK(desc.m_NumClasses);
+ TopKSort(numClassesPerBox, maxScoreIndices.data(), scores + scoreIndex, desc.m_NumClasses);
+
+ for (unsigned int i = 0; i < numClassesPerBox; ++i)
+ {
+ maxScores.push_back(scores[scoreIndex + maxScoreIndices[i]]);
+ maxScoreClasses.push_back(maxScoreIndices[i]);
+ boxIndices.push_back(box);
+ }
+ }
+
+ // Perform NMS on max scores
+ std::vector<unsigned int> selectedIndices = NonMaxSuppression(numBoxes, boxCorners, maxScores,
+ desc.m_NmsScoreThreshold,
+ desc.m_MaxDetections,
+ desc.m_NmsIouThreshold);
+
+ unsigned int numSelected = boost::numeric_cast<unsigned int>(selectedIndices.size());
+ unsigned int numOutput = std::min(desc.m_MaxDetections, numSelected);
+
+ AllocateOutputData(numOutput, numSelected, boxCorners, selectedIndices,
+ boxIndices, maxScoreClasses, maxScores,
+ detectionBoxes, detectionScores, detectionClasses, numDetections);
+ }
+}
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/DetectionPostProcess.hpp b/src/backends/reference/workloads/DetectionPostProcess.hpp
new file mode 100644
index 0000000000..06e9e15781
--- /dev/null
+++ b/src/backends/reference/workloads/DetectionPostProcess.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "armnn/Tensor.hpp"
+#include "armnn/Descriptors.hpp"
+
+namespace armnn
+{
+
+void DetectionPostProcess(const TensorInfo& boxEncodingsInfo,
+ const TensorInfo& scoresInfo,
+ const TensorInfo& anchorsInfo,
+ const TensorInfo& detectionBoxesInfo,
+ const TensorInfo& detectionClassesInfo,
+ const TensorInfo& detectionScoresInfo,
+ const TensorInfo& numDetectionsInfo,
+ const DetectionPostProcessDescriptor& desc,
+ const float* boxEncodings,
+ const float* scores,
+ const float* anchors,
+ float* detectionBoxes,
+ float* detectionClasses,
+ float* detectionScores,
+ float* numDetections);
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.cpp b/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.cpp
new file mode 100644
index 0000000000..ddab046f9c
--- /dev/null
+++ b/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.cpp
@@ -0,0 +1,48 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefDetectionPostProcessFloat32Workload.hpp"
+
+#include "DetectionPostProcess.hpp"
+#include "Profiling.hpp"
+#include "RefWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+RefDetectionPostProcessFloat32Workload::RefDetectionPostProcessFloat32Workload(
+ const DetectionPostProcessQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Float32Workload<DetectionPostProcessQueueDescriptor>(descriptor, info),
+ m_Anchors(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Anchors))) {}
+
+void RefDetectionPostProcessFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDetectionPostProcessUint8Workload_Execute");
+
+ const TensorInfo& boxEncodingsInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& scoresInfo = GetTensorInfo(m_Data.m_Inputs[1]);
+ const TensorInfo& anchorsInfo = GetTensorInfo(m_Anchors.get());
+ const TensorInfo& detectionBoxesInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& detectionClassesInfo = GetTensorInfo(m_Data.m_Outputs[1]);
+ const TensorInfo& detectionScoresInfo = GetTensorInfo(m_Data.m_Outputs[2]);
+ const TensorInfo& numDetectionsInfo = GetTensorInfo(m_Data.m_Outputs[3]);
+
+ const float* boxEncodings = GetInputTensorDataFloat(0, m_Data);
+ const float* scores = GetInputTensorDataFloat(1, m_Data);
+ const float* anchors = m_Anchors->GetConstTensor<float>();
+
+ float* detectionBoxes = GetOutputTensorData<float>(0, m_Data);
+ float* detectionClasses = GetOutputTensorData<float>(1, m_Data);
+ float* detectionScores = GetOutputTensorData<float>(2, m_Data);
+ float* numDetections = GetOutputTensorData<float>(3, m_Data);
+
+ DetectionPostProcess(boxEncodingsInfo, scoresInfo, anchorsInfo,
+ detectionBoxesInfo, detectionClassesInfo,
+ detectionScoresInfo, numDetectionsInfo, m_Data.m_Parameters,
+ boxEncodings, scores, anchors, detectionBoxes,
+ detectionClasses, detectionScores, numDetections);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.hpp b/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.hpp
new file mode 100644
index 0000000000..9f2a697ada
--- /dev/null
+++ b/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.hpp
@@ -0,0 +1,25 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+#include <backendsCommon/WorkloadData.hpp>
+
+namespace armnn
+{
+
+class RefDetectionPostProcessFloat32Workload : public Float32Workload<DetectionPostProcessQueueDescriptor>
+{
+public:
+ explicit RefDetectionPostProcessFloat32Workload(const DetectionPostProcessQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ std::unique_ptr<ScopedCpuTensorHandle> m_Anchors;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.cpp b/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.cpp
new file mode 100644
index 0000000000..ccdaf87c9a
--- /dev/null
+++ b/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.cpp
@@ -0,0 +1,52 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefDetectionPostProcessUint8Workload.hpp"
+
+#include "DetectionPostProcess.hpp"
+#include "Profiling.hpp"
+#include "RefWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+RefDetectionPostProcessUint8Workload::RefDetectionPostProcessUint8Workload(
+ const DetectionPostProcessQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Uint8ToFloat32Workload<DetectionPostProcessQueueDescriptor>(descriptor, info),
+ m_Anchors(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Anchors))) {}
+
+void RefDetectionPostProcessUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDetectionPostProcessUint8Workload_Execute");
+
+ const TensorInfo& boxEncodingsInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& scoresInfo = GetTensorInfo(m_Data.m_Inputs[1]);
+ const TensorInfo& anchorsInfo = GetTensorInfo(m_Anchors.get());
+ const TensorInfo& detectionBoxesInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& detectionClassesInfo = GetTensorInfo(m_Data.m_Outputs[1]);
+ const TensorInfo& detectionScoresInfo = GetTensorInfo(m_Data.m_Outputs[2]);
+ const TensorInfo& numDetectionsInfo = GetTensorInfo(m_Data.m_Outputs[3]);
+
+ const uint8_t* boxEncodingsData = GetInputTensorDataU8(0, m_Data);
+ const uint8_t* scoresData = GetInputTensorDataU8(1, m_Data);
+ const uint8_t* anchorsData = m_Anchors->GetConstTensor<uint8_t>();
+
+ auto boxEncodings = Dequantize(boxEncodingsData, boxEncodingsInfo);
+ auto scores = Dequantize(scoresData, scoresInfo);
+ auto anchors = Dequantize(anchorsData, anchorsInfo);
+
+ float* detectionBoxes = GetOutputTensorData<float>(0, m_Data);
+ float* detectionClasses = GetOutputTensorData<float>(1, m_Data);
+ float* detectionScores = GetOutputTensorData<float>(2, m_Data);
+ float* numDetections = GetOutputTensorData<float>(3, m_Data);
+
+ DetectionPostProcess(boxEncodingsInfo, scoresInfo, anchorsInfo,
+ detectionBoxesInfo, detectionClassesInfo,
+ detectionScoresInfo, numDetectionsInfo, m_Data.m_Parameters,
+ boxEncodings.data(), scores.data(), anchors.data(),
+ detectionBoxes, detectionClasses, detectionScores, numDetections);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.hpp b/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.hpp
new file mode 100644
index 0000000000..91590f57bd
--- /dev/null
+++ b/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.hpp
@@ -0,0 +1,25 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+#include <backendsCommon/WorkloadData.hpp>
+
+namespace armnn
+{
+
+class RefDetectionPostProcessUint8Workload : public Uint8ToFloat32Workload<DetectionPostProcessQueueDescriptor>
+{
+public:
+ explicit RefDetectionPostProcessUint8Workload(const DetectionPostProcessQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ std::unique_ptr<ScopedCpuTensorHandle> m_Anchors;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index d9f4dbb342..2156388ba2 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -38,6 +38,8 @@
#include "ResizeBilinear.hpp"
#include "RefNormalizationFloat32Workload.hpp"
#include "RefDepthwiseConvolution2dFloat32Workload.hpp"
+#include "RefDetectionPostProcessFloat32Workload.hpp"
+#include "RefDetectionPostProcessUint8Workload.hpp"
#include "RefPooling2dUint8Workload.hpp"
#include "BatchNormImpl.hpp"
#include "Activation.hpp"