aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAron Virginas-Tar <Aron.Virginas-Tar@arm.com>2019-06-03 17:10:02 +0100
committerÁron Virginás-Tar <aron.virginas-tar@arm.com>2019-06-05 15:06:39 +0000
commit6331f91a4a1cb1ad16c569d98bb9ddf704788464 (patch)
tree338cce081966bfb42f635b6febd68642d492b9f8
parent18f2d1ccf9e743e61ed3733ae5a38f796a759db8 (diff)
downloadarmnn-6331f91a4a1cb1ad16c569d98bb9ddf704788464.tar.gz
IVGCVSW-2971 Support QSymm16 for DetectionPostProcess workloads
Signed-off-by: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com> Change-Id: I8af45afe851a9ccbf8bce54727147fcd52ac9a1f
-rw-r--r--src/backends/backendsCommon/WorkloadData.cpp66
-rw-r--r--src/backends/backendsCommon/test/DetectionPostProcessLayerTestImpl.hpp471
-rw-r--r--src/backends/reference/RefWorkloadFactory.cpp11
-rw-r--r--src/backends/reference/backend.mk3
-rw-r--r--src/backends/reference/test/RefDetectionPostProcessTests.cpp68
-rw-r--r--src/backends/reference/test/RefLayerTests.cpp16
-rw-r--r--src/backends/reference/workloads/CMakeLists.txt6
-rw-r--r--src/backends/reference/workloads/DetectionPostProcess.cpp111
-rw-r--r--src/backends/reference/workloads/DetectionPostProcess.hpp24
-rw-r--r--src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.cpp48
-rw-r--r--src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.cpp52
-rw-r--r--src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.hpp25
-rw-r--r--src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp50
-rw-r--r--src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp (renamed from src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.hpp)6
-rw-r--r--src/backends/reference/workloads/RefWorkloads.hpp3
15 files changed, 502 insertions, 458 deletions
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index a373f55d3e..d0aaf1db38 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -1459,53 +1459,63 @@ void GatherQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
void DetectionPostProcessQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
{
- ValidateNumInputs(workloadInfo, "DetectionPostProcessQueueDescriptor", 2);
+ const std::string& descriptorName = " DetectionPostProcessQueueDescriptor";
+ ValidateNumInputs(workloadInfo, descriptorName, 2);
if (workloadInfo.m_OutputTensorInfos.size() != 4)
{
- throw InvalidArgumentException("DetectionPostProcessQueueDescriptor: Requires exactly four outputs. " +
+ throw InvalidArgumentException(descriptorName + ": Requires exactly four outputs. " +
to_string(workloadInfo.m_OutputTensorInfos.size()) + " has been provided.");
}
if (m_Anchors == nullptr)
{
- throw InvalidArgumentException("DetectionPostProcessQueueDescriptor: Anchors tensor descriptor is missing.");
+ throw InvalidArgumentException(descriptorName + ": Anchors tensor descriptor is missing.");
}
const TensorInfo& boxEncodingsInfo = workloadInfo.m_InputTensorInfos[0];
- const TensorInfo& scoresInfo = workloadInfo.m_InputTensorInfos[1];
- const TensorInfo& anchorsInfo = m_Anchors->GetTensorInfo();
- const TensorInfo& detectionBoxesInfo = workloadInfo.m_OutputTensorInfos[0];
+ const TensorInfo& scoresInfo = workloadInfo.m_InputTensorInfos[1];
+ const TensorInfo& anchorsInfo = m_Anchors->GetTensorInfo();
+
+ const TensorInfo& detectionBoxesInfo = workloadInfo.m_OutputTensorInfos[0];
const TensorInfo& detectionClassesInfo = workloadInfo.m_OutputTensorInfos[1];
- const TensorInfo& detectionScoresInfo = workloadInfo.m_OutputTensorInfos[2];
- const TensorInfo& numDetectionsInfo = workloadInfo.m_OutputTensorInfos[3];
-
- ValidateTensorNumDimensions(boxEncodingsInfo, "DetectionPostProcessQueueDescriptor", 3, "box encodings");
- ValidateTensorNumDimensions(scoresInfo, "DetectionPostProcessQueueDescriptor", 3, "scores");
- ValidateTensorNumDimensions(anchorsInfo, "DetectionPostProcessQueueDescriptor", 2, "anchors");
-
- ValidateTensorNumDimensions(detectionBoxesInfo, "DetectionPostProcessQueueDescriptor", 3, "detection boxes");
- ValidateTensorNumDimensions(detectionScoresInfo, "DetectionPostProcessQueueDescriptor", 2, "detection scores");
- ValidateTensorNumDimensions(detectionClassesInfo, "DetectionPostProcessQueueDescriptor", 2, "detection classes");
- ValidateTensorNumDimensions(numDetectionsInfo, "DetectionPostProcessQueueDescriptor", 1, "num detections");
-
- ValidateTensorDataType(detectionBoxesInfo, DataType::Float32,
- "DetectionPostProcessQueueDescriptor", "detection boxes");
- ValidateTensorDataType(detectionScoresInfo, DataType::Float32,
- "DetectionPostProcessQueueDescriptor", "detection scores");
- ValidateTensorDataType(detectionClassesInfo, DataType::Float32,
- "DetectionPostProcessQueueDescriptor", "detection classes");
- ValidateTensorDataType(numDetectionsInfo, DataType::Float32,
- "DetectionPostProcessQueueDescriptor", "num detections");
+ const TensorInfo& detectionScoresInfo = workloadInfo.m_OutputTensorInfos[2];
+ const TensorInfo& numDetectionsInfo = workloadInfo.m_OutputTensorInfos[3];
+
+ ValidateTensorNumDimensions(boxEncodingsInfo, descriptorName, 3, "box encodings");
+ ValidateTensorNumDimensions(scoresInfo, descriptorName, 3, "scores");
+ ValidateTensorNumDimensions(anchorsInfo, descriptorName, 2, "anchors");
+
+ const std::vector<DataType> supportedInputTypes =
+ {
+ DataType::Float32,
+ DataType::QuantisedAsymm8,
+ DataType::QuantisedSymm16
+ };
+
+ ValidateDataTypes(boxEncodingsInfo, supportedInputTypes, descriptorName);
+ ValidateDataTypes(scoresInfo, supportedInputTypes, descriptorName);
+ ValidateDataTypes(anchorsInfo, supportedInputTypes, descriptorName);
+
+ ValidateTensorNumDimensions(detectionBoxesInfo, descriptorName, 3, "detection boxes");
+ ValidateTensorNumDimensions(detectionScoresInfo, descriptorName, 2, "detection scores");
+ ValidateTensorNumDimensions(detectionClassesInfo, descriptorName, 2, "detection classes");
+ ValidateTensorNumDimensions(numDetectionsInfo, descriptorName, 1, "num detections");
+
+ // NOTE: Output is always Float32 regardless of input type
+ ValidateTensorDataType(detectionBoxesInfo, DataType::Float32, descriptorName, "detection boxes");
+ ValidateTensorDataType(detectionScoresInfo, DataType::Float32, descriptorName, "detection scores");
+ ValidateTensorDataType(detectionClassesInfo, DataType::Float32, descriptorName, "detection classes");
+ ValidateTensorDataType(numDetectionsInfo, DataType::Float32, descriptorName, "num detections");
if (m_Parameters.m_NmsIouThreshold <= 0.0f || m_Parameters.m_NmsIouThreshold > 1.0f)
{
- throw InvalidArgumentException("DetectionPostProcessQueueDescriptor: Intersection over union threshold "
+ throw InvalidArgumentException(descriptorName + ": Intersection over union threshold "
"must be positive and less than or equal to 1.");
}
if (scoresInfo.GetShape()[2] != m_Parameters.m_NumClasses + 1)
{
- throw InvalidArgumentException("DetectionPostProcessQueueDescriptor: Number of classes with background "
+ throw InvalidArgumentException(descriptorName + ": Number of classes with background "
"should be equal to number of classes + 1.");
}
}
diff --git a/src/backends/backendsCommon/test/DetectionPostProcessLayerTestImpl.hpp b/src/backends/backendsCommon/test/DetectionPostProcessLayerTestImpl.hpp
index 092ce26696..2726fdef4c 100644
--- a/src/backends/backendsCommon/test/DetectionPostProcessLayerTestImpl.hpp
+++ b/src/backends/backendsCommon/test/DetectionPostProcessLayerTestImpl.hpp
@@ -15,7 +15,124 @@
#include <backendsCommon/test/WorkloadFactoryHelper.hpp>
#include <test/TensorHelpers.hpp>
-template <typename FactoryType, armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+namespace
+{
+
+using FloatData = std::vector<float>;
+using QuantData = std::pair<float, int32_t>;
+
+struct TestData
+{
+ static const armnn::TensorShape s_BoxEncodingsShape;
+ static const armnn::TensorShape s_ScoresShape;
+ static const armnn::TensorShape s_AnchorsShape;
+
+ static const QuantData s_BoxEncodingsQuantData;
+ static const QuantData s_ScoresQuantData;
+ static const QuantData s_AnchorsQuantData;
+
+ static const FloatData s_BoxEncodings;
+ static const FloatData s_Scores;
+ static const FloatData s_Anchors;
+};
+
+struct RegularNmsExpectedResults
+{
+ static const FloatData s_DetectionBoxes;
+ static const FloatData s_DetectionScores;
+ static const FloatData s_DetectionClasses;
+ static const FloatData s_NumDetections;
+};
+
+struct FastNmsExpectedResults
+{
+ static const FloatData s_DetectionBoxes;
+ static const FloatData s_DetectionScores;
+ static const FloatData s_DetectionClasses;
+ static const FloatData s_NumDetections;
+};
+
+const armnn::TensorShape TestData::s_BoxEncodingsShape = { 1, 6, 4 };
+const armnn::TensorShape TestData::s_ScoresShape = { 1, 6, 3 };
+const armnn::TensorShape TestData::s_AnchorsShape = { 6, 4 };
+
+const QuantData TestData::s_BoxEncodingsQuantData = { 1.00f, 1 };
+const QuantData TestData::s_ScoresQuantData = { 0.01f, 0 };
+const QuantData TestData::s_AnchorsQuantData = { 0.50f, 0 };
+
+const FloatData TestData::s_BoxEncodings =
+{
+ 0.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 1.0f, 0.0f, 0.0f,
+ 0.0f, -1.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 1.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 0.0f
+};
+
+const FloatData TestData::s_Scores =
+{
+ 0.0f, 0.90f, 0.80f,
+ 0.0f, 0.75f, 0.72f,
+ 0.0f, 0.60f, 0.50f,
+ 0.0f, 0.93f, 0.95f,
+ 0.0f, 0.50f, 0.40f,
+ 0.0f, 0.30f, 0.20f
+};
+
+const FloatData TestData::s_Anchors =
+{
+ 0.5f, 0.5f, 1.0f, 1.0f,
+ 0.5f, 0.5f, 1.0f, 1.0f,
+ 0.5f, 0.5f, 1.0f, 1.0f,
+ 0.5f, 10.5f, 1.0f, 1.0f,
+ 0.5f, 10.5f, 1.0f, 1.0f,
+ 0.5f, 100.5f, 1.0f, 1.0f
+};
+
+const FloatData RegularNmsExpectedResults::s_DetectionBoxes =
+{
+ 0.0f, 10.0f, 1.0f, 11.0f,
+ 0.0f, 10.0f, 1.0f, 11.0f,
+ 0.0f, 0.0f, 0.0f, 0.0f
+};
+
+const FloatData RegularNmsExpectedResults::s_DetectionScores =
+{
+ 0.95f, 0.93f, 0.0f
+};
+
+const FloatData RegularNmsExpectedResults::s_DetectionClasses =
+{
+ 1.0f, 0.0f, 0.0f
+};
+
+const FloatData RegularNmsExpectedResults::s_NumDetections = { 2.0f };
+
+const FloatData FastNmsExpectedResults::s_DetectionBoxes =
+{
+ 0.0f, 10.0f, 1.0f, 11.0f,
+ 0.0f, 0.0f, 1.0f, 1.0f,
+ 0.0f, 100.0f, 1.0f, 101.0f
+};
+
+const FloatData FastNmsExpectedResults::s_DetectionScores =
+{
+ 0.95f, 0.9f, 0.3f
+};
+
+const FloatData FastNmsExpectedResults::s_DetectionClasses =
+{
+ 1.0f, 0.0f, 0.0f
+};
+
+const FloatData FastNmsExpectedResults::s_NumDetections = { 3.0f };
+
+} // anonymous namespace
+
+template<typename FactoryType,
+ armnn::DataType ArmnnType,
+ typename T = armnn::ResolveType<ArmnnType>>
void DetectionPostProcessImpl(const armnn::TensorInfo& boxEncodingsInfo,
const armnn::TensorInfo& scoresInfo,
const armnn::TensorInfo& anchorsInfo,
@@ -110,254 +227,140 @@ void DetectionPostProcessImpl(const armnn::TensorInfo& boxEncodingsInfo,
BOOST_TEST(CompareTensors(numDetectionsResult.output, numDetectionsResult.outputExpected));
}
-inline void QuantizeData(uint8_t* quant, const float* dequant, const armnn::TensorInfo& info)
+template<armnn::DataType QuantizedType, typename RawType = armnn::ResolveType<QuantizedType>>
+void QuantizeData(RawType* quant, const float* dequant, const armnn::TensorInfo& info)
{
for (size_t i = 0; i < info.GetNumElements(); i++)
{
- quant[i] = armnn::Quantize<uint8_t>(dequant[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
+ quant[i] = armnn::Quantize<RawType>(
+ dequant[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
}
}
-template <typename FactoryType>
+template<typename FactoryType>
void DetectionPostProcessRegularNmsFloatTest()
{
- armnn::TensorInfo boxEncodingsInfo({ 1, 6, 4 }, armnn::DataType::Float32);
- armnn::TensorInfo scoresInfo({ 1, 6, 3}, armnn::DataType::Float32);
- armnn::TensorInfo anchorsInfo({ 6, 4 }, armnn::DataType::Float32);
-
- std::vector<float> boxEncodingsData({
- 0.0f, 0.0f, 0.0f, 0.0f,
- 0.0f, 1.0f, 0.0f, 0.0f,
- 0.0f, -1.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, 0.0f, 0.0f,
- 0.0f, 1.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, 0.0f, 0.0f
- });
- std::vector<float> scoresData({
- 0.0f, 0.9f, 0.8f,
- 0.0f, 0.75f, 0.72f,
- 0.0f, 0.6f, 0.5f,
- 0.0f, 0.93f, 0.95f,
- 0.0f, 0.5f, 0.4f,
- 0.0f, 0.3f, 0.2f
- });
- std::vector<float> anchorsData({
- 0.5f, 0.5f, 1.0f, 1.0f,
- 0.5f, 0.5f, 1.0f, 1.0f,
- 0.5f, 0.5f, 1.0f, 1.0f,
- 0.5f, 10.5f, 1.0f, 1.0f,
- 0.5f, 10.5f, 1.0f, 1.0f,
- 0.5f, 100.5f, 1.0f, 1.0f
- });
-
- std::vector<float> expectedDetectionBoxes({
- 0.0f, 10.0f, 1.0f, 11.0f,
- 0.0f, 10.0f, 1.0f, 11.0f,
- 0.0f, 0.0f, 0.0f, 0.0f
- });
- std::vector<float> expectedDetectionScores({ 0.95f, 0.93f, 0.0f });
- std::vector<float> expectedDetectionClasses({ 1.0f, 0.0f, 0.0f });
- std::vector<float> expectedNumDetections({ 2.0f });
-
- return DetectionPostProcessImpl<FactoryType, armnn::DataType::Float32>(boxEncodingsInfo,
- scoresInfo,
- anchorsInfo,
- boxEncodingsData,
- scoresData,
- anchorsData,
- expectedDetectionBoxes,
- expectedDetectionClasses,
- expectedDetectionScores,
- expectedNumDetections,
- true);
+ return DetectionPostProcessImpl<FactoryType, armnn::DataType::Float32>(
+ armnn::TensorInfo(TestData::s_BoxEncodingsShape, armnn::DataType::Float32),
+ armnn::TensorInfo(TestData::s_ScoresShape, armnn::DataType::Float32),
+ armnn::TensorInfo(TestData::s_AnchorsShape, armnn::DataType::Float32),
+ TestData::s_BoxEncodings,
+ TestData::s_Scores,
+ TestData::s_Anchors,
+ RegularNmsExpectedResults::s_DetectionBoxes,
+ RegularNmsExpectedResults::s_DetectionClasses,
+ RegularNmsExpectedResults::s_DetectionScores,
+ RegularNmsExpectedResults::s_NumDetections,
+ true);
}
-template <typename FactoryType>
-void DetectionPostProcessRegularNmsUint8Test()
+template<typename FactoryType,
+ armnn::DataType QuantizedType,
+ typename RawType = armnn::ResolveType<QuantizedType>>
+void DetectionPostProcessRegularNmsQuantizedTest()
{
- armnn::TensorInfo boxEncodingsInfo({ 1, 6, 4 }, armnn::DataType::QuantisedAsymm8);
- armnn::TensorInfo scoresInfo({ 1, 6, 3 }, armnn::DataType::QuantisedAsymm8);
- armnn::TensorInfo anchorsInfo({ 6, 4 }, armnn::DataType::QuantisedAsymm8);
-
- boxEncodingsInfo.SetQuantizationScale(1.0f);
- boxEncodingsInfo.SetQuantizationOffset(1);
- scoresInfo.SetQuantizationScale(0.01f);
- scoresInfo.SetQuantizationOffset(0);
- anchorsInfo.SetQuantizationScale(0.5f);
- anchorsInfo.SetQuantizationOffset(0);
-
- std::vector<float> boxEncodings({
- 0.0f, 0.0f, 0.0f, 0.0f,
- 0.0f, 1.0f, 0.0f, 0.0f,
- 0.0f, -1.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, 0.0f, 0.0f,
- 0.0f, 1.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, 0.0f, 0.0f
- });
- std::vector<float> scores({
- 0.0f, 0.9f, 0.8f,
- 0.0f, 0.75f, 0.72f,
- 0.0f, 0.6f, 0.5f,
- 0.0f, 0.93f, 0.95f,
- 0.0f, 0.5f, 0.4f,
- 0.0f, 0.3f, 0.2f
- });
- std::vector<float> anchors({
- 0.5f, 0.5f, 1.0f, 1.0f,
- 0.5f, 0.5f, 1.0f, 1.0f,
- 0.5f, 0.5f, 1.0f, 1.0f,
- 0.5f, 10.5f, 1.0f, 1.0f,
- 0.5f, 10.5f, 1.0f, 1.0f,
- 0.5f, 100.5f, 1.0f, 1.0f
- });
-
- std::vector<uint8_t> boxEncodingsData(boxEncodings.size(), 0);
- std::vector<uint8_t> scoresData(scores.size(), 0);
- std::vector<uint8_t> anchorsData(anchors.size(), 0);
- QuantizeData(boxEncodingsData.data(), boxEncodings.data(), boxEncodingsInfo);
- QuantizeData(scoresData.data(), scores.data(), scoresInfo);
- QuantizeData(anchorsData.data(), anchors.data(), anchorsInfo);
-
- std::vector<float> expectedDetectionBoxes({
- 0.0f, 10.0f, 1.0f, 11.0f,
- 0.0f, 10.0f, 1.0f, 11.0f,
- 0.0f, 0.0f, 0.0f, 0.0f
- });
- std::vector<float> expectedDetectionScores({ 0.95f, 0.93f, 0.0f });
- std::vector<float> expectedDetectionClasses({ 1.0f, 0.0f, 0.0f });
- std::vector<float> expectedNumDetections({ 2.0f });
-
- return DetectionPostProcessImpl<FactoryType, armnn::DataType::QuantisedAsymm8>(boxEncodingsInfo,
- scoresInfo,
- anchorsInfo,
- boxEncodingsData,
- scoresData,
- anchorsData,
- expectedDetectionBoxes,
- expectedDetectionClasses,
- expectedDetectionScores,
- expectedNumDetections,
- true);
+ armnn::TensorInfo boxEncodingsInfo(TestData::s_BoxEncodingsShape, QuantizedType);
+ armnn::TensorInfo scoresInfo(TestData::s_ScoresShape, QuantizedType);
+ armnn::TensorInfo anchorsInfo(TestData::s_AnchorsShape, QuantizedType);
+
+ boxEncodingsInfo.SetQuantizationScale(TestData::s_BoxEncodingsQuantData.first);
+ boxEncodingsInfo.SetQuantizationOffset(TestData::s_BoxEncodingsQuantData.second);
+
+ scoresInfo.SetQuantizationScale(TestData::s_ScoresQuantData.first);
+ scoresInfo.SetQuantizationOffset(TestData::s_ScoresQuantData.second);
+
+ anchorsInfo.SetQuantizationScale(TestData::s_AnchorsQuantData.first);
+ anchorsInfo.SetQuantizationOffset(TestData::s_BoxEncodingsQuantData.second);
+
+ std::vector<RawType> boxEncodingsData(TestData::s_BoxEncodingsShape.GetNumElements());
+ QuantizeData<QuantizedType>(boxEncodingsData.data(),
+ TestData::s_BoxEncodings.data(),
+ boxEncodingsInfo);
+
+ std::vector<RawType> scoresData(TestData::s_ScoresShape.GetNumElements());
+ QuantizeData<QuantizedType>(scoresData.data(),
+ TestData::s_Scores.data(),
+ scoresInfo);
+
+ std::vector<RawType> anchorsData(TestData::s_AnchorsShape.GetNumElements());
+ QuantizeData<QuantizedType>(anchorsData.data(),
+ TestData::s_Anchors.data(),
+ anchorsInfo);
+
+ return DetectionPostProcessImpl<FactoryType, QuantizedType>(
+ boxEncodingsInfo,
+ scoresInfo,
+ anchorsInfo,
+ boxEncodingsData,
+ scoresData,
+ anchorsData,
+ RegularNmsExpectedResults::s_DetectionBoxes,
+ RegularNmsExpectedResults::s_DetectionClasses,
+ RegularNmsExpectedResults::s_DetectionScores,
+ RegularNmsExpectedResults::s_NumDetections,
+ true);
}
-template <typename FactoryType>
+template<typename FactoryType>
void DetectionPostProcessFastNmsFloatTest()
{
- armnn::TensorInfo boxEncodingsInfo({ 1, 6, 4 }, armnn::DataType::Float32);
- armnn::TensorInfo scoresInfo({ 1, 6, 3}, armnn::DataType::Float32);
- armnn::TensorInfo anchorsInfo({ 6, 4 }, armnn::DataType::Float32);
-
- std::vector<float> boxEncodingsData({
- 0.0f, 0.0f, 0.0f, 0.0f,
- 0.0f, 1.0f, 0.0f, 0.0f,
- 0.0f, -1.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, 0.0f, 0.0f,
- 0.0f, 1.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, 0.0f, 0.0f
- });
- std::vector<float> scoresData({
- 0.0f, 0.9f, 0.8f,
- 0.0f, 0.75f, 0.72f,
- 0.0f, 0.6f, 0.5f,
- 0.0f, 0.93f, 0.95f,
- 0.0f, 0.5f, 0.4f,
- 0.0f, 0.3f, 0.2f
- });
- std::vector<float> anchorsData({
- 0.5f, 0.5f, 1.0f, 1.0f,
- 0.5f, 0.5f, 1.0f, 1.0f,
- 0.5f, 0.5f, 1.0f, 1.0f,
- 0.5f, 10.5f, 1.0f, 1.0f,
- 0.5f, 10.5f, 1.0f, 1.0f,
- 0.5f, 100.5f, 1.0f, 1.0f
- });
-
- std::vector<float> expectedDetectionBoxes({
- 0.0f, 10.0f, 1.0f, 11.0f,
- 0.0f, 0.0f, 1.0f, 1.0f,
- 0.0f, 100.0f, 1.0f, 101.0f
- });
- std::vector<float> expectedDetectionScores({ 0.95f, 0.9f, 0.3f });
- std::vector<float> expectedDetectionClasses({ 1.0f, 0.0f, 0.0f });
- std::vector<float> expectedNumDetections({ 3.0f });
-
- return DetectionPostProcessImpl<FactoryType, armnn::DataType::Float32>(boxEncodingsInfo,
- scoresInfo,
- anchorsInfo,
- boxEncodingsData,
- scoresData,
- anchorsData,
- expectedDetectionBoxes,
- expectedDetectionClasses,
- expectedDetectionScores,
- expectedNumDetections,
- false);
+ return DetectionPostProcessImpl<FactoryType, armnn::DataType::Float32>(
+ armnn::TensorInfo(TestData::s_BoxEncodingsShape, armnn::DataType::Float32),
+ armnn::TensorInfo(TestData::s_ScoresShape, armnn::DataType::Float32),
+ armnn::TensorInfo(TestData::s_AnchorsShape, armnn::DataType::Float32),
+ TestData::s_BoxEncodings,
+ TestData::s_Scores,
+ TestData::s_Anchors,
+ FastNmsExpectedResults::s_DetectionBoxes,
+ FastNmsExpectedResults::s_DetectionClasses,
+ FastNmsExpectedResults::s_DetectionScores,
+ FastNmsExpectedResults::s_NumDetections,
+ false);
}
-template <typename FactoryType>
-void DetectionPostProcessFastNmsUint8Test()
+template<typename FactoryType,
+ armnn::DataType QuantizedType,
+ typename RawType = armnn::ResolveType<QuantizedType>>
+void DetectionPostProcessFastNmsQuantizedTest()
{
- armnn::TensorInfo boxEncodingsInfo({ 1, 6, 4 }, armnn::DataType::QuantisedAsymm8);
- armnn::TensorInfo scoresInfo({ 1, 6, 3 }, armnn::DataType::QuantisedAsymm8);
- armnn::TensorInfo anchorsInfo({ 6, 4 }, armnn::DataType::QuantisedAsymm8);
-
- boxEncodingsInfo.SetQuantizationScale(1.0f);
- boxEncodingsInfo.SetQuantizationOffset(1);
- scoresInfo.SetQuantizationScale(0.01f);
- scoresInfo.SetQuantizationOffset(0);
- anchorsInfo.SetQuantizationScale(0.5f);
- anchorsInfo.SetQuantizationOffset(0);
-
- std::vector<float> boxEncodings({
- 0.0f, 0.0f, 0.0f, 0.0f,
- 0.0f, 1.0f, 0.0f, 0.0f,
- 0.0f, -1.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, 0.0f, 0.0f,
- 0.0f, 1.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, 0.0f, 0.0f
- });
- std::vector<float> scores({
- 0.0f, 0.9f, 0.8f,
- 0.0f, 0.75f, 0.72f,
- 0.0f, 0.6f, 0.5f,
- 0.0f, 0.93f, 0.95f,
- 0.0f, 0.5f, 0.4f,
- 0.0f, 0.3f, 0.2f
- });
- std::vector<float> anchors({
- 0.5f, 0.5f, 1.0f, 1.0f,
- 0.5f, 0.5f, 1.0f, 1.0f,
- 0.5f, 0.5f, 1.0f, 1.0f,
- 0.5f, 10.5f, 1.0f, 1.0f,
- 0.5f, 10.5f, 1.0f, 1.0f,
- 0.5f, 100.5f, 1.0f, 1.0f
- });
-
- std::vector<uint8_t> boxEncodingsData(boxEncodings.size(), 0);
- std::vector<uint8_t> scoresData(scores.size(), 0);
- std::vector<uint8_t> anchorsData(anchors.size(), 0);
- QuantizeData(boxEncodingsData.data(), boxEncodings.data(), boxEncodingsInfo);
- QuantizeData(scoresData.data(), scores.data(), scoresInfo);
- QuantizeData(anchorsData.data(), anchors.data(), anchorsInfo);
-
- std::vector<float> expectedDetectionBoxes({
- 0.0f, 10.0f, 1.0f, 11.0f,
- 0.0f, 0.0f, 1.0f, 1.0f,
- 0.0f, 100.0f, 1.0f, 101.0f
- });
- std::vector<float> expectedDetectionScores({ 0.95f, 0.9f, 0.3f });
- std::vector<float> expectedDetectionClasses({ 1.0f, 0.0f, 0.0f });
- std::vector<float> expectedNumDetections({ 3.0f });
-
- return DetectionPostProcessImpl<FactoryType, armnn::DataType::QuantisedAsymm8>(boxEncodingsInfo,
- scoresInfo,
- anchorsInfo,
- boxEncodingsData,
- scoresData,
- anchorsData,
- expectedDetectionBoxes,
- expectedDetectionClasses,
- expectedDetectionScores,
- expectedNumDetections,
- false);
-}
+ armnn::TensorInfo boxEncodingsInfo(TestData::s_BoxEncodingsShape, QuantizedType);
+ armnn::TensorInfo scoresInfo(TestData::s_ScoresShape, QuantizedType);
+ armnn::TensorInfo anchorsInfo(TestData::s_AnchorsShape, QuantizedType);
+
+ boxEncodingsInfo.SetQuantizationScale(TestData::s_BoxEncodingsQuantData.first);
+ boxEncodingsInfo.SetQuantizationOffset(TestData::s_BoxEncodingsQuantData.second);
+
+ scoresInfo.SetQuantizationScale(TestData::s_ScoresQuantData.first);
+ scoresInfo.SetQuantizationOffset(TestData::s_ScoresQuantData.second);
+
+ anchorsInfo.SetQuantizationScale(TestData::s_AnchorsQuantData.first);
+ anchorsInfo.SetQuantizationOffset(TestData::s_BoxEncodingsQuantData.second);
+
+ std::vector<RawType> boxEncodingsData(TestData::s_BoxEncodingsShape.GetNumElements());
+ QuantizeData<QuantizedType>(boxEncodingsData.data(),
+ TestData::s_BoxEncodings.data(),
+ boxEncodingsInfo);
+
+ std::vector<RawType> scoresData(TestData::s_ScoresShape.GetNumElements());
+ QuantizeData<QuantizedType>(scoresData.data(),
+ TestData::s_Scores.data(),
+ scoresInfo);
+
+ std::vector<RawType> anchorsData(TestData::s_AnchorsShape.GetNumElements());
+ QuantizeData<QuantizedType>(anchorsData.data(),
+ TestData::s_Anchors.data(),
+ anchorsInfo);
+
+ return DetectionPostProcessImpl<FactoryType, QuantizedType>(
+ boxEncodingsInfo,
+ scoresInfo,
+ anchorsInfo,
+ boxEncodingsData,
+ scoresData,
+ anchorsData,
+ FastNmsExpectedResults::s_DetectionBoxes,
+ FastNmsExpectedResults::s_DetectionClasses,
+ FastNmsExpectedResults::s_DetectionScores,
+ FastNmsExpectedResults::s_NumDetections,
+ false);
+} \ No newline at end of file
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index d103f56c23..5e247b2f4f 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -179,16 +179,7 @@ std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateDepthwiseConvolution
std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDetectionPostProcess(
const armnn::DetectionPostProcessQueueDescriptor& descriptor, const armnn::WorkloadInfo& info) const
{
- const DataType dataType = info.m_InputTensorInfos[0].GetDataType();
- switch (dataType)
- {
- case DataType::Float32:
- return std::make_unique<RefDetectionPostProcessFloat32Workload>(descriptor, info);
- case DataType::QuantisedAsymm8:
- return std::make_unique<RefDetectionPostProcessUint8Workload>(descriptor, info);
- default:
- return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
- }
+ return std::make_unique<RefDetectionPostProcessWorkload>(descriptor, info);
}
std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateNormalization(
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index 81b6de18e4..edf1431cfd 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -37,8 +37,7 @@ BACKEND_SOURCES := \
workloads/RefDebugWorkload.cpp \
workloads/RefDepthwiseConvolution2dWorkload.cpp \
workloads/RefDequantizeWorkload.cpp \
- workloads/RefDetectionPostProcessFloat32Workload.cpp \
- workloads/RefDetectionPostProcessUint8Workload.cpp \
+ workloads/RefDetectionPostProcessWorkload.cpp \
workloads/RefElementwiseWorkload.cpp \
workloads/RefFakeQuantizationFloat32Workload.cpp \
workloads/RefFloorWorkload.cpp \
diff --git a/src/backends/reference/test/RefDetectionPostProcessTests.cpp b/src/backends/reference/test/RefDetectionPostProcessTests.cpp
index a9faff70b1..fab6e00bad 100644
--- a/src/backends/reference/test/RefDetectionPostProcessTests.cpp
+++ b/src/backends/reference/test/RefDetectionPostProcessTests.cpp
@@ -3,7 +3,7 @@
// SPDX-License-Identifier: MIT
//
-#include "reference/workloads/DetectionPostProcess.cpp"
+#include <reference/workloads/DetectionPostProcess.hpp>
#include <armnn/Descriptors.hpp>
#include <armnn/Types.hpp>
@@ -12,13 +12,12 @@
BOOST_AUTO_TEST_SUITE(RefDetectionPostProcess)
-
BOOST_AUTO_TEST_CASE(TopKSortTest)
{
unsigned int k = 3;
unsigned int indices[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
float values[8] = { 0, 7, 6, 5, 4, 3, 2, 500 };
- TopKSort(k, indices, values, 8);
+ armnn::TopKSort(k, indices, values, 8);
BOOST_TEST(indices[0] == 7);
BOOST_TEST(indices[1] == 1);
BOOST_TEST(indices[2] == 2);
@@ -29,7 +28,7 @@ BOOST_AUTO_TEST_CASE(FullTopKSortTest)
unsigned int k = 8;
unsigned int indices[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
float values[8] = { 0, 7, 6, 5, 4, 3, 2, 500 };
- TopKSort(k, indices, values, 8);
+ armnn::TopKSort(k, indices, values, 8);
BOOST_TEST(indices[0] == 7);
BOOST_TEST(indices[1] == 1);
BOOST_TEST(indices[2] == 2);
@@ -44,7 +43,7 @@ BOOST_AUTO_TEST_CASE(IouTest)
{
float boxI[4] = { 0.0f, 0.0f, 10.0f, 10.0f };
float boxJ[4] = { 1.0f, 1.0f, 11.0f, 11.0f };
- float iou = IntersectionOverUnion(boxI, boxJ);
+ float iou = armnn::IntersectionOverUnion(boxI, boxJ);
BOOST_TEST(iou == 0.68, boost::test_tools::tolerance(0.001));
}
@@ -61,14 +60,17 @@ BOOST_AUTO_TEST_CASE(NmsFunction)
std::vector<float> scores({ 0.9f, 0.75f, 0.6f, 0.93f, 0.5f, 0.3f });
- std::vector<unsigned int> result = NonMaxSuppression(6, boxCorners, scores, 0.0, 3, 0.5);
+ std::vector<unsigned int> result =
+ armnn::NonMaxSuppression(6, boxCorners, scores, 0.0, 3, 0.5);
+
BOOST_TEST(result.size() == 3);
BOOST_TEST(result[0] == 3);
BOOST_TEST(result[1] == 0);
BOOST_TEST(result[2] == 5);
}
-void DetectionPostProcessTestImpl(bool useRegularNms, const std::vector<float>& expectedDetectionBoxes,
+void DetectionPostProcessTestImpl(bool useRegularNms,
+ const std::vector<float>& expectedDetectionBoxes,
const std::vector<float>& expectedDetectionClasses,
const std::vector<float>& expectedDetectionScores,
const std::vector<float>& expectedNumDetections)
@@ -103,6 +105,7 @@ void DetectionPostProcessTestImpl(bool useRegularNms, const std::vector<float>&
0.0f, 1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 0.0f, 0.0f
});
+
std::vector<float> scores({
0.0f, 0.9f, 0.8f,
0.0f, 0.75f, 0.72f,
@@ -111,6 +114,7 @@ void DetectionPostProcessTestImpl(bool useRegularNms, const std::vector<float>&
0.0f, 0.5f, 0.4f,
0.0f, 0.3f, 0.2f
});
+
std::vector<float> anchors({
0.5f, 0.5f, 1.0f, 1.0f,
0.5f, 0.5f, 1.0f, 1.0f,
@@ -120,22 +124,50 @@ void DetectionPostProcessTestImpl(bool useRegularNms, const std::vector<float>&
0.5f, 100.5f, 1.0f, 1.0f
});
+ auto boxEncodingsDecoder = armnn::MakeDecoder<float>(boxEncodingsInfo, boxEncodings.data());
+ auto scoresDecoder = armnn::MakeDecoder<float>(scoresInfo, scores.data());
+ auto anchorsDecoder = armnn::MakeDecoder<float>(anchorsInfo, anchors.data());
+
std::vector<float> detectionBoxes(detectionBoxesInfo.GetNumElements());
std::vector<float> detectionScores(detectionScoresInfo.GetNumElements());
std::vector<float> detectionClasses(detectionClassesInfo.GetNumElements());
std::vector<float> numDetections(1);
- armnn::DetectionPostProcess(boxEncodingsInfo, scoresInfo, anchorsInfo,
- detectionBoxesInfo, detectionClassesInfo,
- detectionScoresInfo, numDetectionInfo, desc,
- boxEncodings.data(), scores.data(), anchors.data(),
- detectionBoxes.data(), detectionClasses.data(),
- detectionScores.data(), numDetections.data());
-
- BOOST_TEST(detectionBoxes == expectedDetectionBoxes);
- BOOST_TEST(detectionScores == expectedDetectionScores);
- BOOST_TEST(detectionClasses == expectedDetectionClasses);
- BOOST_TEST(numDetections == expectedNumDetections);
+ armnn::DetectionPostProcess(boxEncodingsInfo,
+ scoresInfo,
+ anchorsInfo,
+ detectionBoxesInfo,
+ detectionClassesInfo,
+ detectionScoresInfo,
+ numDetectionInfo,
+ desc,
+ *boxEncodingsDecoder,
+ *scoresDecoder,
+ *anchorsDecoder,
+ detectionBoxes.data(),
+ detectionClasses.data(),
+ detectionScores.data(),
+ numDetections.data());
+
+ BOOST_CHECK_EQUAL_COLLECTIONS(detectionBoxes.begin(),
+ detectionBoxes.end(),
+ expectedDetectionBoxes.begin(),
+ expectedDetectionBoxes.end());
+
+ BOOST_CHECK_EQUAL_COLLECTIONS(detectionScores.begin(),
+ detectionScores.end(),
+ expectedDetectionScores.begin(),
+ expectedDetectionScores.end());
+
+ BOOST_CHECK_EQUAL_COLLECTIONS(detectionClasses.begin(),
+ detectionClasses.end(),
+ expectedDetectionClasses.begin(),
+ expectedDetectionClasses.end());
+
+ BOOST_CHECK_EQUAL_COLLECTIONS(numDetections.begin(),
+ numDetections.end(),
+ expectedNumDetections.begin(),
+ expectedNumDetections.end());
}
BOOST_AUTO_TEST_CASE(RegularNmsDetectionPostProcess)
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index b2f71a8920..f54a8d067c 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -624,11 +624,23 @@ BOOST_AUTO_TEST_CASE(DetectionPostProcessFastNmsFloat)
}
BOOST_AUTO_TEST_CASE(DetectionPostProcessRegularNmsUint8)
{
- DetectionPostProcessRegularNmsUint8Test<armnn::RefWorkloadFactory>();
+ DetectionPostProcessRegularNmsQuantizedTest<
+ armnn::RefWorkloadFactory, armnn::DataType::QuantisedAsymm8>();
}
BOOST_AUTO_TEST_CASE(DetectionPostProcessFastNmsUint8)
{
- DetectionPostProcessFastNmsUint8Test<armnn::RefWorkloadFactory>();
+ DetectionPostProcessRegularNmsQuantizedTest<
+ armnn::RefWorkloadFactory, armnn::DataType::QuantisedAsymm8>();
+}
+BOOST_AUTO_TEST_CASE(DetectionPostProcessRegularNmsInt16)
+{
+ DetectionPostProcessRegularNmsQuantizedTest<
+ armnn::RefWorkloadFactory, armnn::DataType::QuantisedSymm16>();
+}
+BOOST_AUTO_TEST_CASE(DetectionPostProcessFastNmsInt16)
+{
+ DetectionPostProcessFastNmsQuantizedTest<
+ armnn::RefWorkloadFactory, armnn::DataType::QuantisedSymm16>();
}
// Dequantize
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index cdca22da31..25d4b28366 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -60,10 +60,8 @@ list(APPEND armnnRefBackendWorkloads_sources
RefDepthwiseConvolution2dWorkload.hpp
RefDequantizeWorkload.cpp
RefDequantizeWorkload.hpp
- RefDetectionPostProcessUint8Workload.cpp
- RefDetectionPostProcessUint8Workload.hpp
- RefDetectionPostProcessFloat32Workload.cpp
- RefDetectionPostProcessFloat32Workload.hpp
+ RefDetectionPostProcessWorkload.cpp
+ RefDetectionPostProcessWorkload.hpp
RefFakeQuantizationFloat32Workload.cpp
RefFakeQuantizationFloat32Workload.hpp
RefFloorWorkload.cpp
diff --git a/src/backends/reference/workloads/DetectionPostProcess.cpp b/src/backends/reference/workloads/DetectionPostProcess.cpp
index d3790f22ab..d475dd8ac0 100644
--- a/src/backends/reference/workloads/DetectionPostProcess.cpp
+++ b/src/backends/reference/workloads/DetectionPostProcess.cpp
@@ -13,7 +13,7 @@
#include <algorithm>
#include <numeric>
-namespace
+namespace armnn
{
std::vector<unsigned int> GenerateRangeK(unsigned int k)
@@ -48,9 +48,12 @@ float IntersectionOverUnion(const float* boxI, const float* boxJ)
return areaIntersection / areaUnion;
}
-std::vector<unsigned int> NonMaxSuppression(unsigned int numBoxes, const std::vector<float>& boxCorners,
- const std::vector<float>& scores, float nmsScoreThreshold,
- unsigned int maxDetection, float nmsIouThreshold)
+std::vector<unsigned int> NonMaxSuppression(unsigned int numBoxes,
+ const std::vector<float>& boxCorners,
+ const std::vector<float>& scores,
+ float nmsScoreThreshold,
+ unsigned int maxDetection,
+ float nmsIouThreshold)
{
// Select boxes that have scores above a given threshold.
std::vector<float> scoresAboveThreshold;
@@ -67,7 +70,7 @@ std::vector<unsigned int> NonMaxSuppression(unsigned int numBoxes, const std::ve
// Sort the indices based on scores.
unsigned int numAboveThreshold = boost::numeric_cast<unsigned int>(scoresAboveThreshold.size());
std::vector<unsigned int> sortedIndices = GenerateRangeK(numAboveThreshold);
- TopKSort(numAboveThreshold,sortedIndices.data(), scoresAboveThreshold.data(), numAboveThreshold);
+ TopKSort(numAboveThreshold, sortedIndices.data(), scoresAboveThreshold.data(), numAboveThreshold);
// Number of output cannot be more than max detections specified in the option.
unsigned int numOutput = std::min(maxDetection, numAboveThreshold);
@@ -98,10 +101,17 @@ std::vector<unsigned int> NonMaxSuppression(unsigned int numBoxes, const std::ve
return outputIndices;
}
-void AllocateOutputData(unsigned int numOutput, unsigned int numSelected, const std::vector<float>& boxCorners,
- const std::vector<unsigned int>& outputIndices, const std::vector<unsigned int>& selectedBoxes,
- const std::vector<unsigned int>& selectedClasses, const std::vector<float>& selectedScores,
- float* detectionBoxes, float* detectionScores, float* detectionClasses, float* numDetections)
+void AllocateOutputData(unsigned int numOutput,
+ unsigned int numSelected,
+ const std::vector<float>& boxCorners,
+ const std::vector<unsigned int>& outputIndices,
+ const std::vector<unsigned int>& selectedBoxes,
+ const std::vector<unsigned int>& selectedClasses,
+ const std::vector<float>& selectedScores,
+ float* detectionBoxes,
+ float* detectionScores,
+ float* detectionClasses,
+ float* numDetections)
{
for (unsigned int i = 0; i < numOutput; ++i)
{
@@ -129,11 +139,6 @@ void AllocateOutputData(unsigned int numOutput, unsigned int numSelected, const
numDetections[0] = boost::numeric_cast<float>(numSelected);
}
-} // anonymous namespace
-
-namespace armnn
-{
-
void DetectionPostProcess(const TensorInfo& boxEncodingsInfo,
const TensorInfo& scoresInfo,
const TensorInfo& anchorsInfo,
@@ -142,9 +147,9 @@ void DetectionPostProcess(const TensorInfo& boxEncodingsInfo,
const TensorInfo& detectionScoresInfo,
const TensorInfo& numDetectionsInfo,
const DetectionPostProcessDescriptor& desc,
- const float* boxEncodings,
- const float* scores,
- const float* anchors,
+ Decoder<float>& boxEncodings,
+ Decoder<float>& scores,
+ Decoder<float>& anchors,
float* detectionBoxes,
float* detectionClasses,
float* detectionScores,
@@ -153,17 +158,51 @@ void DetectionPostProcess(const TensorInfo& boxEncodingsInfo,
// Transform center-size format which is (ycenter, xcenter, height, width) to box-corner format,
// which represents the lower left corner and the upper right corner (ymin, xmin, ymax, xmax)
std::vector<float> boxCorners(boxEncodingsInfo.GetNumElements());
- unsigned int numBoxes = boxEncodingsInfo.GetShape()[1];
+
+ const unsigned int numBoxes = boxEncodingsInfo.GetShape()[1];
+ const unsigned int numScores = scoresInfo.GetNumElements();
+
for (unsigned int i = 0; i < numBoxes; ++i)
{
+ // Y
+ float boxEncodingY = boxEncodings.Get();
+ float anchorY = anchors.Get();
+
+ ++boxEncodings;
+ ++anchors;
+
+ // X
+ float boxEncodingX = boxEncodings.Get();
+ float anchorX = anchors.Get();
+
+ ++boxEncodings;
+ ++anchors;
+
+ // H
+ float boxEncodingH = boxEncodings.Get();
+ float anchorH = anchors.Get();
+
+ ++boxEncodings;
+ ++anchors;
+
+ // W
+ float boxEncodingW = boxEncodings.Get();
+ float anchorW = anchors.Get();
+
+ ++boxEncodings;
+ ++anchors;
+
+ float yCentre = boxEncodingY / desc.m_ScaleY * anchorH + anchorY;
+ float xCentre = boxEncodingX / desc.m_ScaleX * anchorW + anchorX;
+
+ float halfH = 0.5f * expf(boxEncodingH / desc.m_ScaleH) * anchorH;
+ float halfW = 0.5f * expf(boxEncodingW / desc.m_ScaleW) * anchorW;
+
unsigned int indexY = i * 4;
unsigned int indexX = indexY + 1;
unsigned int indexH = indexX + 1;
unsigned int indexW = indexH + 1;
- float yCentre = boxEncodings[indexY] / desc.m_ScaleY * anchors[indexH] + anchors[indexY];
- float xCentre = boxEncodings[indexX] / desc.m_ScaleX * anchors[indexW] + anchors[indexX];
- float halfH = 0.5f * expf(boxEncodings[indexH] / desc.m_ScaleH) * anchors[indexH];
- float halfW = 0.5f * expf(boxEncodings[indexW] / desc.m_ScaleW) * anchors[indexW];
+
// ymin
boxCorners[indexY] = yCentre - halfH;
// xmin
@@ -179,14 +218,29 @@ void DetectionPostProcess(const TensorInfo& boxEncodingsInfo,
unsigned int numClassesWithBg = desc.m_NumClasses + 1;
+ // Decode scores
+ std::vector<float> decodedScores;
+ decodedScores.reserve(numScores);
+
+ for (unsigned int i = 0u; i < numScores; ++i)
+ {
+ decodedScores.emplace_back(scores.Get());
+ ++scores;
+ }
+
// Perform Non Max Suppression.
if (desc.m_UseRegularNms)
{
// Perform Regular NMS.
// For each class, perform NMS and select max detection numbers of the highest score across all classes.
std::vector<float> classScores(numBoxes);
- std::vector<unsigned int>selectedBoxesAfterNms;
+
+ std::vector<unsigned int> selectedBoxesAfterNms;
+ selectedBoxesAfterNms.reserve(numBoxes);
+
std::vector<float> selectedScoresAfterNms;
+ selectedBoxesAfterNms.reserve(numScores);
+
std::vector<unsigned int> selectedClasses;
for (unsigned int c = 0; c < desc.m_NumClasses; ++c)
@@ -194,9 +248,11 @@ void DetectionPostProcess(const TensorInfo& boxEncodingsInfo,
// For each boxes, get scores of the boxes for the class c.
for (unsigned int i = 0; i < numBoxes; ++i)
{
- classScores[i] = scores[i * numClassesWithBg + c + 1];
+ classScores[i] = decodedScores[i * numClassesWithBg + c + 1];
}
- std::vector<unsigned int> selectedIndices = NonMaxSuppression(numBoxes, boxCorners, classScores,
+ std::vector<unsigned int> selectedIndices = NonMaxSuppression(numBoxes,
+ boxCorners,
+ classScores,
desc.m_NmsScoreThreshold,
desc.m_DetectionsPerClass,
desc.m_NmsIouThreshold);
@@ -237,11 +293,12 @@ void DetectionPostProcess(const TensorInfo& boxEncodingsInfo,
// Get the max scores of the box.
std::vector<unsigned int> maxScoreIndices = GenerateRangeK(desc.m_NumClasses);
- TopKSort(numClassesPerBox, maxScoreIndices.data(), scores + scoreIndex, desc.m_NumClasses);
+ TopKSort(numClassesPerBox, maxScoreIndices.data(),
+ decodedScores.data() + scoreIndex, desc.m_NumClasses);
for (unsigned int i = 0; i < numClassesPerBox; ++i)
{
- maxScores.push_back(scores[scoreIndex + maxScoreIndices[i]]);
+ maxScores.push_back(decodedScores[scoreIndex + maxScoreIndices[i]]);
maxScoreClasses.push_back(maxScoreIndices[i]);
boxIndices.push_back(box);
}
diff --git a/src/backends/reference/workloads/DetectionPostProcess.hpp b/src/backends/reference/workloads/DetectionPostProcess.hpp
index 06e9e15781..8700a53317 100644
--- a/src/backends/reference/workloads/DetectionPostProcess.hpp
+++ b/src/backends/reference/workloads/DetectionPostProcess.hpp
@@ -7,6 +7,10 @@
#include "armnn/Tensor.hpp"
#include "armnn/Descriptors.hpp"
+#include "Decoders.hpp"
+
+#include <vector>
+
namespace armnn
{
@@ -18,12 +22,26 @@ void DetectionPostProcess(const TensorInfo& boxEncodingsInfo,
const TensorInfo& detectionScoresInfo,
const TensorInfo& numDetectionsInfo,
const DetectionPostProcessDescriptor& desc,
- const float* boxEncodings,
- const float* scores,
- const float* anchors,
+ Decoder<float>& boxEncodings,
+ Decoder<float>& scores,
+ Decoder<float>& anchors,
float* detectionBoxes,
float* detectionClasses,
float* detectionScores,
float* numDetections);
+void TopKSort(unsigned int k,
+ unsigned int* indices,
+ const float* values,
+ unsigned int numElement);
+
+float IntersectionOverUnion(const float* boxI, const float* boxJ);
+
+std::vector<unsigned int> NonMaxSuppression(unsigned int numBoxes,
+ const std::vector<float>& boxCorners,
+ const std::vector<float>& scores,
+ float nmsScoreThreshold,
+ unsigned int maxDetection,
+ float nmsIouThreshold);
+
} // namespace armnn
diff --git a/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.cpp b/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.cpp
deleted file mode 100644
index ddab046f9c..0000000000
--- a/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#include "RefDetectionPostProcessFloat32Workload.hpp"
-
-#include "DetectionPostProcess.hpp"
-#include "Profiling.hpp"
-#include "RefWorkloadUtils.hpp"
-
-namespace armnn
-{
-
-RefDetectionPostProcessFloat32Workload::RefDetectionPostProcessFloat32Workload(
- const DetectionPostProcessQueueDescriptor& descriptor, const WorkloadInfo& info)
- : Float32Workload<DetectionPostProcessQueueDescriptor>(descriptor, info),
- m_Anchors(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Anchors))) {}
-
-void RefDetectionPostProcessFloat32Workload::Execute() const
-{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDetectionPostProcessUint8Workload_Execute");
-
- const TensorInfo& boxEncodingsInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& scoresInfo = GetTensorInfo(m_Data.m_Inputs[1]);
- const TensorInfo& anchorsInfo = GetTensorInfo(m_Anchors.get());
- const TensorInfo& detectionBoxesInfo = GetTensorInfo(m_Data.m_Outputs[0]);
- const TensorInfo& detectionClassesInfo = GetTensorInfo(m_Data.m_Outputs[1]);
- const TensorInfo& detectionScoresInfo = GetTensorInfo(m_Data.m_Outputs[2]);
- const TensorInfo& numDetectionsInfo = GetTensorInfo(m_Data.m_Outputs[3]);
-
- const float* boxEncodings = GetInputTensorDataFloat(0, m_Data);
- const float* scores = GetInputTensorDataFloat(1, m_Data);
- const float* anchors = m_Anchors->GetConstTensor<float>();
-
- float* detectionBoxes = GetOutputTensorData<float>(0, m_Data);
- float* detectionClasses = GetOutputTensorData<float>(1, m_Data);
- float* detectionScores = GetOutputTensorData<float>(2, m_Data);
- float* numDetections = GetOutputTensorData<float>(3, m_Data);
-
- DetectionPostProcess(boxEncodingsInfo, scoresInfo, anchorsInfo,
- detectionBoxesInfo, detectionClassesInfo,
- detectionScoresInfo, numDetectionsInfo, m_Data.m_Parameters,
- boxEncodings, scores, anchors, detectionBoxes,
- detectionClasses, detectionScores, numDetections);
-}
-
-} //namespace armnn
diff --git a/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.cpp b/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.cpp
deleted file mode 100644
index ccdaf87c9a..0000000000
--- a/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#include "RefDetectionPostProcessUint8Workload.hpp"
-
-#include "DetectionPostProcess.hpp"
-#include "Profiling.hpp"
-#include "RefWorkloadUtils.hpp"
-
-namespace armnn
-{
-
-RefDetectionPostProcessUint8Workload::RefDetectionPostProcessUint8Workload(
- const DetectionPostProcessQueueDescriptor& descriptor, const WorkloadInfo& info)
- : Uint8ToFloat32Workload<DetectionPostProcessQueueDescriptor>(descriptor, info),
- m_Anchors(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Anchors))) {}
-
-void RefDetectionPostProcessUint8Workload::Execute() const
-{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDetectionPostProcessUint8Workload_Execute");
-
- const TensorInfo& boxEncodingsInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorInfo& scoresInfo = GetTensorInfo(m_Data.m_Inputs[1]);
- const TensorInfo& anchorsInfo = GetTensorInfo(m_Anchors.get());
- const TensorInfo& detectionBoxesInfo = GetTensorInfo(m_Data.m_Outputs[0]);
- const TensorInfo& detectionClassesInfo = GetTensorInfo(m_Data.m_Outputs[1]);
- const TensorInfo& detectionScoresInfo = GetTensorInfo(m_Data.m_Outputs[2]);
- const TensorInfo& numDetectionsInfo = GetTensorInfo(m_Data.m_Outputs[3]);
-
- const uint8_t* boxEncodingsData = GetInputTensorDataU8(0, m_Data);
- const uint8_t* scoresData = GetInputTensorDataU8(1, m_Data);
- const uint8_t* anchorsData = m_Anchors->GetConstTensor<uint8_t>();
-
- auto boxEncodings = Dequantize(boxEncodingsData, boxEncodingsInfo);
- auto scores = Dequantize(scoresData, scoresInfo);
- auto anchors = Dequantize(anchorsData, anchorsInfo);
-
- float* detectionBoxes = GetOutputTensorData<float>(0, m_Data);
- float* detectionClasses = GetOutputTensorData<float>(1, m_Data);
- float* detectionScores = GetOutputTensorData<float>(2, m_Data);
- float* numDetections = GetOutputTensorData<float>(3, m_Data);
-
- DetectionPostProcess(boxEncodingsInfo, scoresInfo, anchorsInfo,
- detectionBoxesInfo, detectionClassesInfo,
- detectionScoresInfo, numDetectionsInfo, m_Data.m_Parameters,
- boxEncodings.data(), scores.data(), anchors.data(),
- detectionBoxes, detectionClasses, detectionScores, numDetections);
-}
-
-} //namespace armnn
diff --git a/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.hpp b/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.hpp
deleted file mode 100644
index 91590f57bd..0000000000
--- a/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.hpp
+++ /dev/null
@@ -1,25 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#pragma once
-
-#include <backendsCommon/Workload.hpp>
-#include <backendsCommon/WorkloadData.hpp>
-
-namespace armnn
-{
-
-class RefDetectionPostProcessUint8Workload : public Uint8ToFloat32Workload<DetectionPostProcessQueueDescriptor>
-{
-public:
- explicit RefDetectionPostProcessUint8Workload(const DetectionPostProcessQueueDescriptor& descriptor,
- const WorkloadInfo& info);
- virtual void Execute() const override;
-
-private:
- std::unique_ptr<ScopedCpuTensorHandle> m_Anchors;
-};
-
-} //namespace armnn
diff --git a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp
new file mode 100644
index 0000000000..db24cc53e4
--- /dev/null
+++ b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp
@@ -0,0 +1,50 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefDetectionPostProcessWorkload.hpp"
+
+#include "Decoders.hpp"
+#include "DetectionPostProcess.hpp"
+#include "Profiling.hpp"
+#include "RefWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+RefDetectionPostProcessWorkload::RefDetectionPostProcessWorkload(
+ const DetectionPostProcessQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : BaseWorkload<DetectionPostProcessQueueDescriptor>(descriptor, info),
+ m_Anchors(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Anchors))) {}
+
+void RefDetectionPostProcessWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDetectionPostProcessWorkload_Execute");
+
+ const TensorInfo& boxEncodingsInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& scoresInfo = GetTensorInfo(m_Data.m_Inputs[1]);
+ const TensorInfo& anchorsInfo = GetTensorInfo(m_Anchors.get());
+
+ const TensorInfo& detectionBoxesInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+ const TensorInfo& detectionClassesInfo = GetTensorInfo(m_Data.m_Outputs[1]);
+ const TensorInfo& detectionScoresInfo = GetTensorInfo(m_Data.m_Outputs[2]);
+ const TensorInfo& numDetectionsInfo = GetTensorInfo(m_Data.m_Outputs[3]);
+
+ auto boxEncodings = MakeDecoder<float>(boxEncodingsInfo, m_Data.m_Inputs[0]->Map());
+ auto scores = MakeDecoder<float>(scoresInfo, m_Data.m_Inputs[1]->Map());
+ auto anchors = MakeDecoder<float>(anchorsInfo, m_Anchors->Map(false));
+
+ float* detectionBoxes = GetOutputTensorData<float>(0, m_Data);
+ float* detectionClasses = GetOutputTensorData<float>(1, m_Data);
+ float* detectionScores = GetOutputTensorData<float>(2, m_Data);
+ float* numDetections = GetOutputTensorData<float>(3, m_Data);
+
+ DetectionPostProcess(boxEncodingsInfo, scoresInfo, anchorsInfo,
+ detectionBoxesInfo, detectionClassesInfo,
+ detectionScoresInfo, numDetectionsInfo, m_Data.m_Parameters,
+ *boxEncodings, *scores, *anchors, detectionBoxes,
+ detectionClasses, detectionScores, numDetections);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.hpp b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp
index 9f2a697ada..799d0c6219 100644
--- a/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.hpp
+++ b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp
@@ -11,11 +11,11 @@
namespace armnn
{
-class RefDetectionPostProcessFloat32Workload : public Float32Workload<DetectionPostProcessQueueDescriptor>
+class RefDetectionPostProcessWorkload : public BaseWorkload<DetectionPostProcessQueueDescriptor>
{
public:
- explicit RefDetectionPostProcessFloat32Workload(const DetectionPostProcessQueueDescriptor& descriptor,
- const WorkloadInfo& info);
+ explicit RefDetectionPostProcessWorkload(const DetectionPostProcessQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
virtual void Execute() const override;
private:
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index 7ccd4efc54..8d99b69685 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -31,8 +31,7 @@
#include "RefResizeBilinearFloat32Workload.hpp"
#include "ResizeBilinear.hpp"
#include "RefNormalizationFloat32Workload.hpp"
-#include "RefDetectionPostProcessFloat32Workload.hpp"
-#include "RefDetectionPostProcessUint8Workload.hpp"
+#include "RefDetectionPostProcessWorkload.hpp"
#include "RefPooling2dUint8Workload.hpp"
#include "BatchNormImpl.hpp"
#include "Activation.hpp"