diff options
15 files changed, 502 insertions, 458 deletions
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp index a373f55d3e..d0aaf1db38 100644 --- a/src/backends/backendsCommon/WorkloadData.cpp +++ b/src/backends/backendsCommon/WorkloadData.cpp @@ -1459,53 +1459,63 @@ void GatherQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const void DetectionPostProcessQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const { - ValidateNumInputs(workloadInfo, "DetectionPostProcessQueueDescriptor", 2); + const std::string& descriptorName = " DetectionPostProcessQueueDescriptor"; + ValidateNumInputs(workloadInfo, descriptorName, 2); if (workloadInfo.m_OutputTensorInfos.size() != 4) { - throw InvalidArgumentException("DetectionPostProcessQueueDescriptor: Requires exactly four outputs. " + + throw InvalidArgumentException(descriptorName + ": Requires exactly four outputs. " + to_string(workloadInfo.m_OutputTensorInfos.size()) + " has been provided."); } if (m_Anchors == nullptr) { - throw InvalidArgumentException("DetectionPostProcessQueueDescriptor: Anchors tensor descriptor is missing."); + throw InvalidArgumentException(descriptorName + ": Anchors tensor descriptor is missing."); } const TensorInfo& boxEncodingsInfo = workloadInfo.m_InputTensorInfos[0]; - const TensorInfo& scoresInfo = workloadInfo.m_InputTensorInfos[1]; - const TensorInfo& anchorsInfo = m_Anchors->GetTensorInfo(); - const TensorInfo& detectionBoxesInfo = workloadInfo.m_OutputTensorInfos[0]; + const TensorInfo& scoresInfo = workloadInfo.m_InputTensorInfos[1]; + const TensorInfo& anchorsInfo = m_Anchors->GetTensorInfo(); + + const TensorInfo& detectionBoxesInfo = workloadInfo.m_OutputTensorInfos[0]; const TensorInfo& detectionClassesInfo = workloadInfo.m_OutputTensorInfos[1]; - const TensorInfo& detectionScoresInfo = workloadInfo.m_OutputTensorInfos[2]; - const TensorInfo& numDetectionsInfo = workloadInfo.m_OutputTensorInfos[3]; - - ValidateTensorNumDimensions(boxEncodingsInfo, "DetectionPostProcessQueueDescriptor", 3, "box encodings"); - ValidateTensorNumDimensions(scoresInfo, "DetectionPostProcessQueueDescriptor", 3, "scores"); - ValidateTensorNumDimensions(anchorsInfo, "DetectionPostProcessQueueDescriptor", 2, "anchors"); - - ValidateTensorNumDimensions(detectionBoxesInfo, "DetectionPostProcessQueueDescriptor", 3, "detection boxes"); - ValidateTensorNumDimensions(detectionScoresInfo, "DetectionPostProcessQueueDescriptor", 2, "detection scores"); - ValidateTensorNumDimensions(detectionClassesInfo, "DetectionPostProcessQueueDescriptor", 2, "detection classes"); - ValidateTensorNumDimensions(numDetectionsInfo, "DetectionPostProcessQueueDescriptor", 1, "num detections"); - - ValidateTensorDataType(detectionBoxesInfo, DataType::Float32, - "DetectionPostProcessQueueDescriptor", "detection boxes"); - ValidateTensorDataType(detectionScoresInfo, DataType::Float32, - "DetectionPostProcessQueueDescriptor", "detection scores"); - ValidateTensorDataType(detectionClassesInfo, DataType::Float32, - "DetectionPostProcessQueueDescriptor", "detection classes"); - ValidateTensorDataType(numDetectionsInfo, DataType::Float32, - "DetectionPostProcessQueueDescriptor", "num detections"); + const TensorInfo& detectionScoresInfo = workloadInfo.m_OutputTensorInfos[2]; + const TensorInfo& numDetectionsInfo = workloadInfo.m_OutputTensorInfos[3]; + + ValidateTensorNumDimensions(boxEncodingsInfo, descriptorName, 3, "box encodings"); + ValidateTensorNumDimensions(scoresInfo, descriptorName, 3, "scores"); + ValidateTensorNumDimensions(anchorsInfo, descriptorName, 2, "anchors"); + + const std::vector<DataType> supportedInputTypes = + { + DataType::Float32, + DataType::QuantisedAsymm8, + DataType::QuantisedSymm16 + }; + + ValidateDataTypes(boxEncodingsInfo, supportedInputTypes, descriptorName); + ValidateDataTypes(scoresInfo, supportedInputTypes, descriptorName); + ValidateDataTypes(anchorsInfo, supportedInputTypes, descriptorName); + + ValidateTensorNumDimensions(detectionBoxesInfo, descriptorName, 3, "detection boxes"); + ValidateTensorNumDimensions(detectionScoresInfo, descriptorName, 2, "detection scores"); + ValidateTensorNumDimensions(detectionClassesInfo, descriptorName, 2, "detection classes"); + ValidateTensorNumDimensions(numDetectionsInfo, descriptorName, 1, "num detections"); + + // NOTE: Output is always Float32 regardless of input type + ValidateTensorDataType(detectionBoxesInfo, DataType::Float32, descriptorName, "detection boxes"); + ValidateTensorDataType(detectionScoresInfo, DataType::Float32, descriptorName, "detection scores"); + ValidateTensorDataType(detectionClassesInfo, DataType::Float32, descriptorName, "detection classes"); + ValidateTensorDataType(numDetectionsInfo, DataType::Float32, descriptorName, "num detections"); if (m_Parameters.m_NmsIouThreshold <= 0.0f || m_Parameters.m_NmsIouThreshold > 1.0f) { - throw InvalidArgumentException("DetectionPostProcessQueueDescriptor: Intersection over union threshold " + throw InvalidArgumentException(descriptorName + ": Intersection over union threshold " "must be positive and less than or equal to 1."); } if (scoresInfo.GetShape()[2] != m_Parameters.m_NumClasses + 1) { - throw InvalidArgumentException("DetectionPostProcessQueueDescriptor: Number of classes with background " + throw InvalidArgumentException(descriptorName + ": Number of classes with background " "should be equal to number of classes + 1."); } } diff --git a/src/backends/backendsCommon/test/DetectionPostProcessLayerTestImpl.hpp b/src/backends/backendsCommon/test/DetectionPostProcessLayerTestImpl.hpp index 092ce26696..2726fdef4c 100644 --- a/src/backends/backendsCommon/test/DetectionPostProcessLayerTestImpl.hpp +++ b/src/backends/backendsCommon/test/DetectionPostProcessLayerTestImpl.hpp @@ -15,7 +15,124 @@ #include <backendsCommon/test/WorkloadFactoryHelper.hpp> #include <test/TensorHelpers.hpp> -template <typename FactoryType, armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +namespace +{ + +using FloatData = std::vector<float>; +using QuantData = std::pair<float, int32_t>; + +struct TestData +{ + static const armnn::TensorShape s_BoxEncodingsShape; + static const armnn::TensorShape s_ScoresShape; + static const armnn::TensorShape s_AnchorsShape; + + static const QuantData s_BoxEncodingsQuantData; + static const QuantData s_ScoresQuantData; + static const QuantData s_AnchorsQuantData; + + static const FloatData s_BoxEncodings; + static const FloatData s_Scores; + static const FloatData s_Anchors; +}; + +struct RegularNmsExpectedResults +{ + static const FloatData s_DetectionBoxes; + static const FloatData s_DetectionScores; + static const FloatData s_DetectionClasses; + static const FloatData s_NumDetections; +}; + +struct FastNmsExpectedResults +{ + static const FloatData s_DetectionBoxes; + static const FloatData s_DetectionScores; + static const FloatData s_DetectionClasses; + static const FloatData s_NumDetections; +}; + +const armnn::TensorShape TestData::s_BoxEncodingsShape = { 1, 6, 4 }; +const armnn::TensorShape TestData::s_ScoresShape = { 1, 6, 3 }; +const armnn::TensorShape TestData::s_AnchorsShape = { 6, 4 }; + +const QuantData TestData::s_BoxEncodingsQuantData = { 1.00f, 1 }; +const QuantData TestData::s_ScoresQuantData = { 0.01f, 0 }; +const QuantData TestData::s_AnchorsQuantData = { 0.50f, 0 }; + +const FloatData TestData::s_BoxEncodings = +{ + 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, -1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f +}; + +const FloatData TestData::s_Scores = +{ + 0.0f, 0.90f, 0.80f, + 0.0f, 0.75f, 0.72f, + 0.0f, 0.60f, 0.50f, + 0.0f, 0.93f, 0.95f, + 0.0f, 0.50f, 0.40f, + 0.0f, 0.30f, 0.20f +}; + +const FloatData TestData::s_Anchors = +{ + 0.5f, 0.5f, 1.0f, 1.0f, + 0.5f, 0.5f, 1.0f, 1.0f, + 0.5f, 0.5f, 1.0f, 1.0f, + 0.5f, 10.5f, 1.0f, 1.0f, + 0.5f, 10.5f, 1.0f, 1.0f, + 0.5f, 100.5f, 1.0f, 1.0f +}; + +const FloatData RegularNmsExpectedResults::s_DetectionBoxes = +{ + 0.0f, 10.0f, 1.0f, 11.0f, + 0.0f, 10.0f, 1.0f, 11.0f, + 0.0f, 0.0f, 0.0f, 0.0f +}; + +const FloatData RegularNmsExpectedResults::s_DetectionScores = +{ + 0.95f, 0.93f, 0.0f +}; + +const FloatData RegularNmsExpectedResults::s_DetectionClasses = +{ + 1.0f, 0.0f, 0.0f +}; + +const FloatData RegularNmsExpectedResults::s_NumDetections = { 2.0f }; + +const FloatData FastNmsExpectedResults::s_DetectionBoxes = +{ + 0.0f, 10.0f, 1.0f, 11.0f, + 0.0f, 0.0f, 1.0f, 1.0f, + 0.0f, 100.0f, 1.0f, 101.0f +}; + +const FloatData FastNmsExpectedResults::s_DetectionScores = +{ + 0.95f, 0.9f, 0.3f +}; + +const FloatData FastNmsExpectedResults::s_DetectionClasses = +{ + 1.0f, 0.0f, 0.0f +}; + +const FloatData FastNmsExpectedResults::s_NumDetections = { 3.0f }; + +} // anonymous namespace + +template<typename FactoryType, + armnn::DataType ArmnnType, + typename T = armnn::ResolveType<ArmnnType>> void DetectionPostProcessImpl(const armnn::TensorInfo& boxEncodingsInfo, const armnn::TensorInfo& scoresInfo, const armnn::TensorInfo& anchorsInfo, @@ -110,254 +227,140 @@ void DetectionPostProcessImpl(const armnn::TensorInfo& boxEncodingsInfo, BOOST_TEST(CompareTensors(numDetectionsResult.output, numDetectionsResult.outputExpected)); } -inline void QuantizeData(uint8_t* quant, const float* dequant, const armnn::TensorInfo& info) +template<armnn::DataType QuantizedType, typename RawType = armnn::ResolveType<QuantizedType>> +void QuantizeData(RawType* quant, const float* dequant, const armnn::TensorInfo& info) { for (size_t i = 0; i < info.GetNumElements(); i++) { - quant[i] = armnn::Quantize<uint8_t>(dequant[i], info.GetQuantizationScale(), info.GetQuantizationOffset()); + quant[i] = armnn::Quantize<RawType>( + dequant[i], info.GetQuantizationScale(), info.GetQuantizationOffset()); } } -template <typename FactoryType> +template<typename FactoryType> void DetectionPostProcessRegularNmsFloatTest() { - armnn::TensorInfo boxEncodingsInfo({ 1, 6, 4 }, armnn::DataType::Float32); - armnn::TensorInfo scoresInfo({ 1, 6, 3}, armnn::DataType::Float32); - armnn::TensorInfo anchorsInfo({ 6, 4 }, armnn::DataType::Float32); - - std::vector<float> boxEncodingsData({ - 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, -1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f - }); - std::vector<float> scoresData({ - 0.0f, 0.9f, 0.8f, - 0.0f, 0.75f, 0.72f, - 0.0f, 0.6f, 0.5f, - 0.0f, 0.93f, 0.95f, - 0.0f, 0.5f, 0.4f, - 0.0f, 0.3f, 0.2f - }); - std::vector<float> anchorsData({ - 0.5f, 0.5f, 1.0f, 1.0f, - 0.5f, 0.5f, 1.0f, 1.0f, - 0.5f, 0.5f, 1.0f, 1.0f, - 0.5f, 10.5f, 1.0f, 1.0f, - 0.5f, 10.5f, 1.0f, 1.0f, - 0.5f, 100.5f, 1.0f, 1.0f - }); - - std::vector<float> expectedDetectionBoxes({ - 0.0f, 10.0f, 1.0f, 11.0f, - 0.0f, 10.0f, 1.0f, 11.0f, - 0.0f, 0.0f, 0.0f, 0.0f - }); - std::vector<float> expectedDetectionScores({ 0.95f, 0.93f, 0.0f }); - std::vector<float> expectedDetectionClasses({ 1.0f, 0.0f, 0.0f }); - std::vector<float> expectedNumDetections({ 2.0f }); - - return DetectionPostProcessImpl<FactoryType, armnn::DataType::Float32>(boxEncodingsInfo, - scoresInfo, - anchorsInfo, - boxEncodingsData, - scoresData, - anchorsData, - expectedDetectionBoxes, - expectedDetectionClasses, - expectedDetectionScores, - expectedNumDetections, - true); + return DetectionPostProcessImpl<FactoryType, armnn::DataType::Float32>( + armnn::TensorInfo(TestData::s_BoxEncodingsShape, armnn::DataType::Float32), + armnn::TensorInfo(TestData::s_ScoresShape, armnn::DataType::Float32), + armnn::TensorInfo(TestData::s_AnchorsShape, armnn::DataType::Float32), + TestData::s_BoxEncodings, + TestData::s_Scores, + TestData::s_Anchors, + RegularNmsExpectedResults::s_DetectionBoxes, + RegularNmsExpectedResults::s_DetectionClasses, + RegularNmsExpectedResults::s_DetectionScores, + RegularNmsExpectedResults::s_NumDetections, + true); } -template <typename FactoryType> -void DetectionPostProcessRegularNmsUint8Test() +template<typename FactoryType, + armnn::DataType QuantizedType, + typename RawType = armnn::ResolveType<QuantizedType>> +void DetectionPostProcessRegularNmsQuantizedTest() { - armnn::TensorInfo boxEncodingsInfo({ 1, 6, 4 }, armnn::DataType::QuantisedAsymm8); - armnn::TensorInfo scoresInfo({ 1, 6, 3 }, armnn::DataType::QuantisedAsymm8); - armnn::TensorInfo anchorsInfo({ 6, 4 }, armnn::DataType::QuantisedAsymm8); - - boxEncodingsInfo.SetQuantizationScale(1.0f); - boxEncodingsInfo.SetQuantizationOffset(1); - scoresInfo.SetQuantizationScale(0.01f); - scoresInfo.SetQuantizationOffset(0); - anchorsInfo.SetQuantizationScale(0.5f); - anchorsInfo.SetQuantizationOffset(0); - - std::vector<float> boxEncodings({ - 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, -1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f - }); - std::vector<float> scores({ - 0.0f, 0.9f, 0.8f, - 0.0f, 0.75f, 0.72f, - 0.0f, 0.6f, 0.5f, - 0.0f, 0.93f, 0.95f, - 0.0f, 0.5f, 0.4f, - 0.0f, 0.3f, 0.2f - }); - std::vector<float> anchors({ - 0.5f, 0.5f, 1.0f, 1.0f, - 0.5f, 0.5f, 1.0f, 1.0f, - 0.5f, 0.5f, 1.0f, 1.0f, - 0.5f, 10.5f, 1.0f, 1.0f, - 0.5f, 10.5f, 1.0f, 1.0f, - 0.5f, 100.5f, 1.0f, 1.0f - }); - - std::vector<uint8_t> boxEncodingsData(boxEncodings.size(), 0); - std::vector<uint8_t> scoresData(scores.size(), 0); - std::vector<uint8_t> anchorsData(anchors.size(), 0); - QuantizeData(boxEncodingsData.data(), boxEncodings.data(), boxEncodingsInfo); - QuantizeData(scoresData.data(), scores.data(), scoresInfo); - QuantizeData(anchorsData.data(), anchors.data(), anchorsInfo); - - std::vector<float> expectedDetectionBoxes({ - 0.0f, 10.0f, 1.0f, 11.0f, - 0.0f, 10.0f, 1.0f, 11.0f, - 0.0f, 0.0f, 0.0f, 0.0f - }); - std::vector<float> expectedDetectionScores({ 0.95f, 0.93f, 0.0f }); - std::vector<float> expectedDetectionClasses({ 1.0f, 0.0f, 0.0f }); - std::vector<float> expectedNumDetections({ 2.0f }); - - return DetectionPostProcessImpl<FactoryType, armnn::DataType::QuantisedAsymm8>(boxEncodingsInfo, - scoresInfo, - anchorsInfo, - boxEncodingsData, - scoresData, - anchorsData, - expectedDetectionBoxes, - expectedDetectionClasses, - expectedDetectionScores, - expectedNumDetections, - true); + armnn::TensorInfo boxEncodingsInfo(TestData::s_BoxEncodingsShape, QuantizedType); + armnn::TensorInfo scoresInfo(TestData::s_ScoresShape, QuantizedType); + armnn::TensorInfo anchorsInfo(TestData::s_AnchorsShape, QuantizedType); + + boxEncodingsInfo.SetQuantizationScale(TestData::s_BoxEncodingsQuantData.first); + boxEncodingsInfo.SetQuantizationOffset(TestData::s_BoxEncodingsQuantData.second); + + scoresInfo.SetQuantizationScale(TestData::s_ScoresQuantData.first); + scoresInfo.SetQuantizationOffset(TestData::s_ScoresQuantData.second); + + anchorsInfo.SetQuantizationScale(TestData::s_AnchorsQuantData.first); + anchorsInfo.SetQuantizationOffset(TestData::s_BoxEncodingsQuantData.second); + + std::vector<RawType> boxEncodingsData(TestData::s_BoxEncodingsShape.GetNumElements()); + QuantizeData<QuantizedType>(boxEncodingsData.data(), + TestData::s_BoxEncodings.data(), + boxEncodingsInfo); + + std::vector<RawType> scoresData(TestData::s_ScoresShape.GetNumElements()); + QuantizeData<QuantizedType>(scoresData.data(), + TestData::s_Scores.data(), + scoresInfo); + + std::vector<RawType> anchorsData(TestData::s_AnchorsShape.GetNumElements()); + QuantizeData<QuantizedType>(anchorsData.data(), + TestData::s_Anchors.data(), + anchorsInfo); + + return DetectionPostProcessImpl<FactoryType, QuantizedType>( + boxEncodingsInfo, + scoresInfo, + anchorsInfo, + boxEncodingsData, + scoresData, + anchorsData, + RegularNmsExpectedResults::s_DetectionBoxes, + RegularNmsExpectedResults::s_DetectionClasses, + RegularNmsExpectedResults::s_DetectionScores, + RegularNmsExpectedResults::s_NumDetections, + true); } -template <typename FactoryType> +template<typename FactoryType> void DetectionPostProcessFastNmsFloatTest() { - armnn::TensorInfo boxEncodingsInfo({ 1, 6, 4 }, armnn::DataType::Float32); - armnn::TensorInfo scoresInfo({ 1, 6, 3}, armnn::DataType::Float32); - armnn::TensorInfo anchorsInfo({ 6, 4 }, armnn::DataType::Float32); - - std::vector<float> boxEncodingsData({ - 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, -1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f - }); - std::vector<float> scoresData({ - 0.0f, 0.9f, 0.8f, - 0.0f, 0.75f, 0.72f, - 0.0f, 0.6f, 0.5f, - 0.0f, 0.93f, 0.95f, - 0.0f, 0.5f, 0.4f, - 0.0f, 0.3f, 0.2f - }); - std::vector<float> anchorsData({ - 0.5f, 0.5f, 1.0f, 1.0f, - 0.5f, 0.5f, 1.0f, 1.0f, - 0.5f, 0.5f, 1.0f, 1.0f, - 0.5f, 10.5f, 1.0f, 1.0f, - 0.5f, 10.5f, 1.0f, 1.0f, - 0.5f, 100.5f, 1.0f, 1.0f - }); - - std::vector<float> expectedDetectionBoxes({ - 0.0f, 10.0f, 1.0f, 11.0f, - 0.0f, 0.0f, 1.0f, 1.0f, - 0.0f, 100.0f, 1.0f, 101.0f - }); - std::vector<float> expectedDetectionScores({ 0.95f, 0.9f, 0.3f }); - std::vector<float> expectedDetectionClasses({ 1.0f, 0.0f, 0.0f }); - std::vector<float> expectedNumDetections({ 3.0f }); - - return DetectionPostProcessImpl<FactoryType, armnn::DataType::Float32>(boxEncodingsInfo, - scoresInfo, - anchorsInfo, - boxEncodingsData, - scoresData, - anchorsData, - expectedDetectionBoxes, - expectedDetectionClasses, - expectedDetectionScores, - expectedNumDetections, - false); + return DetectionPostProcessImpl<FactoryType, armnn::DataType::Float32>( + armnn::TensorInfo(TestData::s_BoxEncodingsShape, armnn::DataType::Float32), + armnn::TensorInfo(TestData::s_ScoresShape, armnn::DataType::Float32), + armnn::TensorInfo(TestData::s_AnchorsShape, armnn::DataType::Float32), + TestData::s_BoxEncodings, + TestData::s_Scores, + TestData::s_Anchors, + FastNmsExpectedResults::s_DetectionBoxes, + FastNmsExpectedResults::s_DetectionClasses, + FastNmsExpectedResults::s_DetectionScores, + FastNmsExpectedResults::s_NumDetections, + false); } -template <typename FactoryType> -void DetectionPostProcessFastNmsUint8Test() +template<typename FactoryType, + armnn::DataType QuantizedType, + typename RawType = armnn::ResolveType<QuantizedType>> +void DetectionPostProcessFastNmsQuantizedTest() { - armnn::TensorInfo boxEncodingsInfo({ 1, 6, 4 }, armnn::DataType::QuantisedAsymm8); - armnn::TensorInfo scoresInfo({ 1, 6, 3 }, armnn::DataType::QuantisedAsymm8); - armnn::TensorInfo anchorsInfo({ 6, 4 }, armnn::DataType::QuantisedAsymm8); - - boxEncodingsInfo.SetQuantizationScale(1.0f); - boxEncodingsInfo.SetQuantizationOffset(1); - scoresInfo.SetQuantizationScale(0.01f); - scoresInfo.SetQuantizationOffset(0); - anchorsInfo.SetQuantizationScale(0.5f); - anchorsInfo.SetQuantizationOffset(0); - - std::vector<float> boxEncodings({ - 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, -1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f - }); - std::vector<float> scores({ - 0.0f, 0.9f, 0.8f, - 0.0f, 0.75f, 0.72f, - 0.0f, 0.6f, 0.5f, - 0.0f, 0.93f, 0.95f, - 0.0f, 0.5f, 0.4f, - 0.0f, 0.3f, 0.2f - }); - std::vector<float> anchors({ - 0.5f, 0.5f, 1.0f, 1.0f, - 0.5f, 0.5f, 1.0f, 1.0f, - 0.5f, 0.5f, 1.0f, 1.0f, - 0.5f, 10.5f, 1.0f, 1.0f, - 0.5f, 10.5f, 1.0f, 1.0f, - 0.5f, 100.5f, 1.0f, 1.0f - }); - - std::vector<uint8_t> boxEncodingsData(boxEncodings.size(), 0); - std::vector<uint8_t> scoresData(scores.size(), 0); - std::vector<uint8_t> anchorsData(anchors.size(), 0); - QuantizeData(boxEncodingsData.data(), boxEncodings.data(), boxEncodingsInfo); - QuantizeData(scoresData.data(), scores.data(), scoresInfo); - QuantizeData(anchorsData.data(), anchors.data(), anchorsInfo); - - std::vector<float> expectedDetectionBoxes({ - 0.0f, 10.0f, 1.0f, 11.0f, - 0.0f, 0.0f, 1.0f, 1.0f, - 0.0f, 100.0f, 1.0f, 101.0f - }); - std::vector<float> expectedDetectionScores({ 0.95f, 0.9f, 0.3f }); - std::vector<float> expectedDetectionClasses({ 1.0f, 0.0f, 0.0f }); - std::vector<float> expectedNumDetections({ 3.0f }); - - return DetectionPostProcessImpl<FactoryType, armnn::DataType::QuantisedAsymm8>(boxEncodingsInfo, - scoresInfo, - anchorsInfo, - boxEncodingsData, - scoresData, - anchorsData, - expectedDetectionBoxes, - expectedDetectionClasses, - expectedDetectionScores, - expectedNumDetections, - false); -} + armnn::TensorInfo boxEncodingsInfo(TestData::s_BoxEncodingsShape, QuantizedType); + armnn::TensorInfo scoresInfo(TestData::s_ScoresShape, QuantizedType); + armnn::TensorInfo anchorsInfo(TestData::s_AnchorsShape, QuantizedType); + + boxEncodingsInfo.SetQuantizationScale(TestData::s_BoxEncodingsQuantData.first); + boxEncodingsInfo.SetQuantizationOffset(TestData::s_BoxEncodingsQuantData.second); + + scoresInfo.SetQuantizationScale(TestData::s_ScoresQuantData.first); + scoresInfo.SetQuantizationOffset(TestData::s_ScoresQuantData.second); + + anchorsInfo.SetQuantizationScale(TestData::s_AnchorsQuantData.first); + anchorsInfo.SetQuantizationOffset(TestData::s_BoxEncodingsQuantData.second); + + std::vector<RawType> boxEncodingsData(TestData::s_BoxEncodingsShape.GetNumElements()); + QuantizeData<QuantizedType>(boxEncodingsData.data(), + TestData::s_BoxEncodings.data(), + boxEncodingsInfo); + + std::vector<RawType> scoresData(TestData::s_ScoresShape.GetNumElements()); + QuantizeData<QuantizedType>(scoresData.data(), + TestData::s_Scores.data(), + scoresInfo); + + std::vector<RawType> anchorsData(TestData::s_AnchorsShape.GetNumElements()); + QuantizeData<QuantizedType>(anchorsData.data(), + TestData::s_Anchors.data(), + anchorsInfo); + + return DetectionPostProcessImpl<FactoryType, QuantizedType>( + boxEncodingsInfo, + scoresInfo, + anchorsInfo, + boxEncodingsData, + scoresData, + anchorsData, + FastNmsExpectedResults::s_DetectionBoxes, + FastNmsExpectedResults::s_DetectionClasses, + FastNmsExpectedResults::s_DetectionScores, + FastNmsExpectedResults::s_NumDetections, + false); +}
\ No newline at end of file diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp index d103f56c23..5e247b2f4f 100644 --- a/src/backends/reference/RefWorkloadFactory.cpp +++ b/src/backends/reference/RefWorkloadFactory.cpp @@ -179,16 +179,7 @@ std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateDepthwiseConvolution std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDetectionPostProcess( const armnn::DetectionPostProcessQueueDescriptor& descriptor, const armnn::WorkloadInfo& info) const { - const DataType dataType = info.m_InputTensorInfos[0].GetDataType(); - switch (dataType) - { - case DataType::Float32: - return std::make_unique<RefDetectionPostProcessFloat32Workload>(descriptor, info); - case DataType::QuantisedAsymm8: - return std::make_unique<RefDetectionPostProcessUint8Workload>(descriptor, info); - default: - return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); - } + return std::make_unique<RefDetectionPostProcessWorkload>(descriptor, info); } std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateNormalization( diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk index 81b6de18e4..edf1431cfd 100644 --- a/src/backends/reference/backend.mk +++ b/src/backends/reference/backend.mk @@ -37,8 +37,7 @@ BACKEND_SOURCES := \ workloads/RefDebugWorkload.cpp \ workloads/RefDepthwiseConvolution2dWorkload.cpp \ workloads/RefDequantizeWorkload.cpp \ - workloads/RefDetectionPostProcessFloat32Workload.cpp \ - workloads/RefDetectionPostProcessUint8Workload.cpp \ + workloads/RefDetectionPostProcessWorkload.cpp \ workloads/RefElementwiseWorkload.cpp \ workloads/RefFakeQuantizationFloat32Workload.cpp \ workloads/RefFloorWorkload.cpp \ diff --git a/src/backends/reference/test/RefDetectionPostProcessTests.cpp b/src/backends/reference/test/RefDetectionPostProcessTests.cpp index a9faff70b1..fab6e00bad 100644 --- a/src/backends/reference/test/RefDetectionPostProcessTests.cpp +++ b/src/backends/reference/test/RefDetectionPostProcessTests.cpp @@ -3,7 +3,7 @@ // SPDX-License-Identifier: MIT // -#include "reference/workloads/DetectionPostProcess.cpp" +#include <reference/workloads/DetectionPostProcess.hpp> #include <armnn/Descriptors.hpp> #include <armnn/Types.hpp> @@ -12,13 +12,12 @@ BOOST_AUTO_TEST_SUITE(RefDetectionPostProcess) - BOOST_AUTO_TEST_CASE(TopKSortTest) { unsigned int k = 3; unsigned int indices[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; float values[8] = { 0, 7, 6, 5, 4, 3, 2, 500 }; - TopKSort(k, indices, values, 8); + armnn::TopKSort(k, indices, values, 8); BOOST_TEST(indices[0] == 7); BOOST_TEST(indices[1] == 1); BOOST_TEST(indices[2] == 2); @@ -29,7 +28,7 @@ BOOST_AUTO_TEST_CASE(FullTopKSortTest) unsigned int k = 8; unsigned int indices[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; float values[8] = { 0, 7, 6, 5, 4, 3, 2, 500 }; - TopKSort(k, indices, values, 8); + armnn::TopKSort(k, indices, values, 8); BOOST_TEST(indices[0] == 7); BOOST_TEST(indices[1] == 1); BOOST_TEST(indices[2] == 2); @@ -44,7 +43,7 @@ BOOST_AUTO_TEST_CASE(IouTest) { float boxI[4] = { 0.0f, 0.0f, 10.0f, 10.0f }; float boxJ[4] = { 1.0f, 1.0f, 11.0f, 11.0f }; - float iou = IntersectionOverUnion(boxI, boxJ); + float iou = armnn::IntersectionOverUnion(boxI, boxJ); BOOST_TEST(iou == 0.68, boost::test_tools::tolerance(0.001)); } @@ -61,14 +60,17 @@ BOOST_AUTO_TEST_CASE(NmsFunction) std::vector<float> scores({ 0.9f, 0.75f, 0.6f, 0.93f, 0.5f, 0.3f }); - std::vector<unsigned int> result = NonMaxSuppression(6, boxCorners, scores, 0.0, 3, 0.5); + std::vector<unsigned int> result = + armnn::NonMaxSuppression(6, boxCorners, scores, 0.0, 3, 0.5); + BOOST_TEST(result.size() == 3); BOOST_TEST(result[0] == 3); BOOST_TEST(result[1] == 0); BOOST_TEST(result[2] == 5); } -void DetectionPostProcessTestImpl(bool useRegularNms, const std::vector<float>& expectedDetectionBoxes, +void DetectionPostProcessTestImpl(bool useRegularNms, + const std::vector<float>& expectedDetectionBoxes, const std::vector<float>& expectedDetectionClasses, const std::vector<float>& expectedDetectionScores, const std::vector<float>& expectedNumDetections) @@ -103,6 +105,7 @@ void DetectionPostProcessTestImpl(bool useRegularNms, const std::vector<float>& 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }); + std::vector<float> scores({ 0.0f, 0.9f, 0.8f, 0.0f, 0.75f, 0.72f, @@ -111,6 +114,7 @@ void DetectionPostProcessTestImpl(bool useRegularNms, const std::vector<float>& 0.0f, 0.5f, 0.4f, 0.0f, 0.3f, 0.2f }); + std::vector<float> anchors({ 0.5f, 0.5f, 1.0f, 1.0f, 0.5f, 0.5f, 1.0f, 1.0f, @@ -120,22 +124,50 @@ void DetectionPostProcessTestImpl(bool useRegularNms, const std::vector<float>& 0.5f, 100.5f, 1.0f, 1.0f }); + auto boxEncodingsDecoder = armnn::MakeDecoder<float>(boxEncodingsInfo, boxEncodings.data()); + auto scoresDecoder = armnn::MakeDecoder<float>(scoresInfo, scores.data()); + auto anchorsDecoder = armnn::MakeDecoder<float>(anchorsInfo, anchors.data()); + std::vector<float> detectionBoxes(detectionBoxesInfo.GetNumElements()); std::vector<float> detectionScores(detectionScoresInfo.GetNumElements()); std::vector<float> detectionClasses(detectionClassesInfo.GetNumElements()); std::vector<float> numDetections(1); - armnn::DetectionPostProcess(boxEncodingsInfo, scoresInfo, anchorsInfo, - detectionBoxesInfo, detectionClassesInfo, - detectionScoresInfo, numDetectionInfo, desc, - boxEncodings.data(), scores.data(), anchors.data(), - detectionBoxes.data(), detectionClasses.data(), - detectionScores.data(), numDetections.data()); - - BOOST_TEST(detectionBoxes == expectedDetectionBoxes); - BOOST_TEST(detectionScores == expectedDetectionScores); - BOOST_TEST(detectionClasses == expectedDetectionClasses); - BOOST_TEST(numDetections == expectedNumDetections); + armnn::DetectionPostProcess(boxEncodingsInfo, + scoresInfo, + anchorsInfo, + detectionBoxesInfo, + detectionClassesInfo, + detectionScoresInfo, + numDetectionInfo, + desc, + *boxEncodingsDecoder, + *scoresDecoder, + *anchorsDecoder, + detectionBoxes.data(), + detectionClasses.data(), + detectionScores.data(), + numDetections.data()); + + BOOST_CHECK_EQUAL_COLLECTIONS(detectionBoxes.begin(), + detectionBoxes.end(), + expectedDetectionBoxes.begin(), + expectedDetectionBoxes.end()); + + BOOST_CHECK_EQUAL_COLLECTIONS(detectionScores.begin(), + detectionScores.end(), + expectedDetectionScores.begin(), + expectedDetectionScores.end()); + + BOOST_CHECK_EQUAL_COLLECTIONS(detectionClasses.begin(), + detectionClasses.end(), + expectedDetectionClasses.begin(), + expectedDetectionClasses.end()); + + BOOST_CHECK_EQUAL_COLLECTIONS(numDetections.begin(), + numDetections.end(), + expectedNumDetections.begin(), + expectedNumDetections.end()); } BOOST_AUTO_TEST_CASE(RegularNmsDetectionPostProcess) diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp index b2f71a8920..f54a8d067c 100644 --- a/src/backends/reference/test/RefLayerTests.cpp +++ b/src/backends/reference/test/RefLayerTests.cpp @@ -624,11 +624,23 @@ BOOST_AUTO_TEST_CASE(DetectionPostProcessFastNmsFloat) } BOOST_AUTO_TEST_CASE(DetectionPostProcessRegularNmsUint8) { - DetectionPostProcessRegularNmsUint8Test<armnn::RefWorkloadFactory>(); + DetectionPostProcessRegularNmsQuantizedTest< + armnn::RefWorkloadFactory, armnn::DataType::QuantisedAsymm8>(); } BOOST_AUTO_TEST_CASE(DetectionPostProcessFastNmsUint8) { - DetectionPostProcessFastNmsUint8Test<armnn::RefWorkloadFactory>(); + DetectionPostProcessRegularNmsQuantizedTest< + armnn::RefWorkloadFactory, armnn::DataType::QuantisedAsymm8>(); +} +BOOST_AUTO_TEST_CASE(DetectionPostProcessRegularNmsInt16) +{ + DetectionPostProcessRegularNmsQuantizedTest< + armnn::RefWorkloadFactory, armnn::DataType::QuantisedSymm16>(); +} +BOOST_AUTO_TEST_CASE(DetectionPostProcessFastNmsInt16) +{ + DetectionPostProcessFastNmsQuantizedTest< + armnn::RefWorkloadFactory, armnn::DataType::QuantisedSymm16>(); } // Dequantize diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt index cdca22da31..25d4b28366 100644 --- a/src/backends/reference/workloads/CMakeLists.txt +++ b/src/backends/reference/workloads/CMakeLists.txt @@ -60,10 +60,8 @@ list(APPEND armnnRefBackendWorkloads_sources RefDepthwiseConvolution2dWorkload.hpp RefDequantizeWorkload.cpp RefDequantizeWorkload.hpp - RefDetectionPostProcessUint8Workload.cpp - RefDetectionPostProcessUint8Workload.hpp - RefDetectionPostProcessFloat32Workload.cpp - RefDetectionPostProcessFloat32Workload.hpp + RefDetectionPostProcessWorkload.cpp + RefDetectionPostProcessWorkload.hpp RefFakeQuantizationFloat32Workload.cpp RefFakeQuantizationFloat32Workload.hpp RefFloorWorkload.cpp diff --git a/src/backends/reference/workloads/DetectionPostProcess.cpp b/src/backends/reference/workloads/DetectionPostProcess.cpp index d3790f22ab..d475dd8ac0 100644 --- a/src/backends/reference/workloads/DetectionPostProcess.cpp +++ b/src/backends/reference/workloads/DetectionPostProcess.cpp @@ -13,7 +13,7 @@ #include <algorithm> #include <numeric> -namespace +namespace armnn { std::vector<unsigned int> GenerateRangeK(unsigned int k) @@ -48,9 +48,12 @@ float IntersectionOverUnion(const float* boxI, const float* boxJ) return areaIntersection / areaUnion; } -std::vector<unsigned int> NonMaxSuppression(unsigned int numBoxes, const std::vector<float>& boxCorners, - const std::vector<float>& scores, float nmsScoreThreshold, - unsigned int maxDetection, float nmsIouThreshold) +std::vector<unsigned int> NonMaxSuppression(unsigned int numBoxes, + const std::vector<float>& boxCorners, + const std::vector<float>& scores, + float nmsScoreThreshold, + unsigned int maxDetection, + float nmsIouThreshold) { // Select boxes that have scores above a given threshold. std::vector<float> scoresAboveThreshold; @@ -67,7 +70,7 @@ std::vector<unsigned int> NonMaxSuppression(unsigned int numBoxes, const std::ve // Sort the indices based on scores. unsigned int numAboveThreshold = boost::numeric_cast<unsigned int>(scoresAboveThreshold.size()); std::vector<unsigned int> sortedIndices = GenerateRangeK(numAboveThreshold); - TopKSort(numAboveThreshold,sortedIndices.data(), scoresAboveThreshold.data(), numAboveThreshold); + TopKSort(numAboveThreshold, sortedIndices.data(), scoresAboveThreshold.data(), numAboveThreshold); // Number of output cannot be more than max detections specified in the option. unsigned int numOutput = std::min(maxDetection, numAboveThreshold); @@ -98,10 +101,17 @@ std::vector<unsigned int> NonMaxSuppression(unsigned int numBoxes, const std::ve return outputIndices; } -void AllocateOutputData(unsigned int numOutput, unsigned int numSelected, const std::vector<float>& boxCorners, - const std::vector<unsigned int>& outputIndices, const std::vector<unsigned int>& selectedBoxes, - const std::vector<unsigned int>& selectedClasses, const std::vector<float>& selectedScores, - float* detectionBoxes, float* detectionScores, float* detectionClasses, float* numDetections) +void AllocateOutputData(unsigned int numOutput, + unsigned int numSelected, + const std::vector<float>& boxCorners, + const std::vector<unsigned int>& outputIndices, + const std::vector<unsigned int>& selectedBoxes, + const std::vector<unsigned int>& selectedClasses, + const std::vector<float>& selectedScores, + float* detectionBoxes, + float* detectionScores, + float* detectionClasses, + float* numDetections) { for (unsigned int i = 0; i < numOutput; ++i) { @@ -129,11 +139,6 @@ void AllocateOutputData(unsigned int numOutput, unsigned int numSelected, const numDetections[0] = boost::numeric_cast<float>(numSelected); } -} // anonymous namespace - -namespace armnn -{ - void DetectionPostProcess(const TensorInfo& boxEncodingsInfo, const TensorInfo& scoresInfo, const TensorInfo& anchorsInfo, @@ -142,9 +147,9 @@ void DetectionPostProcess(const TensorInfo& boxEncodingsInfo, const TensorInfo& detectionScoresInfo, const TensorInfo& numDetectionsInfo, const DetectionPostProcessDescriptor& desc, - const float* boxEncodings, - const float* scores, - const float* anchors, + Decoder<float>& boxEncodings, + Decoder<float>& scores, + Decoder<float>& anchors, float* detectionBoxes, float* detectionClasses, float* detectionScores, @@ -153,17 +158,51 @@ void DetectionPostProcess(const TensorInfo& boxEncodingsInfo, // Transform center-size format which is (ycenter, xcenter, height, width) to box-corner format, // which represents the lower left corner and the upper right corner (ymin, xmin, ymax, xmax) std::vector<float> boxCorners(boxEncodingsInfo.GetNumElements()); - unsigned int numBoxes = boxEncodingsInfo.GetShape()[1]; + + const unsigned int numBoxes = boxEncodingsInfo.GetShape()[1]; + const unsigned int numScores = scoresInfo.GetNumElements(); + for (unsigned int i = 0; i < numBoxes; ++i) { + // Y + float boxEncodingY = boxEncodings.Get(); + float anchorY = anchors.Get(); + + ++boxEncodings; + ++anchors; + + // X + float boxEncodingX = boxEncodings.Get(); + float anchorX = anchors.Get(); + + ++boxEncodings; + ++anchors; + + // H + float boxEncodingH = boxEncodings.Get(); + float anchorH = anchors.Get(); + + ++boxEncodings; + ++anchors; + + // W + float boxEncodingW = boxEncodings.Get(); + float anchorW = anchors.Get(); + + ++boxEncodings; + ++anchors; + + float yCentre = boxEncodingY / desc.m_ScaleY * anchorH + anchorY; + float xCentre = boxEncodingX / desc.m_ScaleX * anchorW + anchorX; + + float halfH = 0.5f * expf(boxEncodingH / desc.m_ScaleH) * anchorH; + float halfW = 0.5f * expf(boxEncodingW / desc.m_ScaleW) * anchorW; + unsigned int indexY = i * 4; unsigned int indexX = indexY + 1; unsigned int indexH = indexX + 1; unsigned int indexW = indexH + 1; - float yCentre = boxEncodings[indexY] / desc.m_ScaleY * anchors[indexH] + anchors[indexY]; - float xCentre = boxEncodings[indexX] / desc.m_ScaleX * anchors[indexW] + anchors[indexX]; - float halfH = 0.5f * expf(boxEncodings[indexH] / desc.m_ScaleH) * anchors[indexH]; - float halfW = 0.5f * expf(boxEncodings[indexW] / desc.m_ScaleW) * anchors[indexW]; + // ymin boxCorners[indexY] = yCentre - halfH; // xmin @@ -179,14 +218,29 @@ void DetectionPostProcess(const TensorInfo& boxEncodingsInfo, unsigned int numClassesWithBg = desc.m_NumClasses + 1; + // Decode scores + std::vector<float> decodedScores; + decodedScores.reserve(numScores); + + for (unsigned int i = 0u; i < numScores; ++i) + { + decodedScores.emplace_back(scores.Get()); + ++scores; + } + // Perform Non Max Suppression. if (desc.m_UseRegularNms) { // Perform Regular NMS. // For each class, perform NMS and select max detection numbers of the highest score across all classes. std::vector<float> classScores(numBoxes); - std::vector<unsigned int>selectedBoxesAfterNms; + + std::vector<unsigned int> selectedBoxesAfterNms; + selectedBoxesAfterNms.reserve(numBoxes); + std::vector<float> selectedScoresAfterNms; + selectedBoxesAfterNms.reserve(numScores); + std::vector<unsigned int> selectedClasses; for (unsigned int c = 0; c < desc.m_NumClasses; ++c) @@ -194,9 +248,11 @@ void DetectionPostProcess(const TensorInfo& boxEncodingsInfo, // For each boxes, get scores of the boxes for the class c. for (unsigned int i = 0; i < numBoxes; ++i) { - classScores[i] = scores[i * numClassesWithBg + c + 1]; + classScores[i] = decodedScores[i * numClassesWithBg + c + 1]; } - std::vector<unsigned int> selectedIndices = NonMaxSuppression(numBoxes, boxCorners, classScores, + std::vector<unsigned int> selectedIndices = NonMaxSuppression(numBoxes, + boxCorners, + classScores, desc.m_NmsScoreThreshold, desc.m_DetectionsPerClass, desc.m_NmsIouThreshold); @@ -237,11 +293,12 @@ void DetectionPostProcess(const TensorInfo& boxEncodingsInfo, // Get the max scores of the box. std::vector<unsigned int> maxScoreIndices = GenerateRangeK(desc.m_NumClasses); - TopKSort(numClassesPerBox, maxScoreIndices.data(), scores + scoreIndex, desc.m_NumClasses); + TopKSort(numClassesPerBox, maxScoreIndices.data(), + decodedScores.data() + scoreIndex, desc.m_NumClasses); for (unsigned int i = 0; i < numClassesPerBox; ++i) { - maxScores.push_back(scores[scoreIndex + maxScoreIndices[i]]); + maxScores.push_back(decodedScores[scoreIndex + maxScoreIndices[i]]); maxScoreClasses.push_back(maxScoreIndices[i]); boxIndices.push_back(box); } diff --git a/src/backends/reference/workloads/DetectionPostProcess.hpp b/src/backends/reference/workloads/DetectionPostProcess.hpp index 06e9e15781..8700a53317 100644 --- a/src/backends/reference/workloads/DetectionPostProcess.hpp +++ b/src/backends/reference/workloads/DetectionPostProcess.hpp @@ -7,6 +7,10 @@ #include "armnn/Tensor.hpp" #include "armnn/Descriptors.hpp" +#include "Decoders.hpp" + +#include <vector> + namespace armnn { @@ -18,12 +22,26 @@ void DetectionPostProcess(const TensorInfo& boxEncodingsInfo, const TensorInfo& detectionScoresInfo, const TensorInfo& numDetectionsInfo, const DetectionPostProcessDescriptor& desc, - const float* boxEncodings, - const float* scores, - const float* anchors, + Decoder<float>& boxEncodings, + Decoder<float>& scores, + Decoder<float>& anchors, float* detectionBoxes, float* detectionClasses, float* detectionScores, float* numDetections); +void TopKSort(unsigned int k, + unsigned int* indices, + const float* values, + unsigned int numElement); + +float IntersectionOverUnion(const float* boxI, const float* boxJ); + +std::vector<unsigned int> NonMaxSuppression(unsigned int numBoxes, + const std::vector<float>& boxCorners, + const std::vector<float>& scores, + float nmsScoreThreshold, + unsigned int maxDetection, + float nmsIouThreshold); + } // namespace armnn diff --git a/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.cpp b/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.cpp deleted file mode 100644 index ddab046f9c..0000000000 --- a/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.cpp +++ /dev/null @@ -1,48 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefDetectionPostProcessFloat32Workload.hpp" - -#include "DetectionPostProcess.hpp" -#include "Profiling.hpp" -#include "RefWorkloadUtils.hpp" - -namespace armnn -{ - -RefDetectionPostProcessFloat32Workload::RefDetectionPostProcessFloat32Workload( - const DetectionPostProcessQueueDescriptor& descriptor, const WorkloadInfo& info) - : Float32Workload<DetectionPostProcessQueueDescriptor>(descriptor, info), - m_Anchors(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Anchors))) {} - -void RefDetectionPostProcessFloat32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDetectionPostProcessUint8Workload_Execute"); - - const TensorInfo& boxEncodingsInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& scoresInfo = GetTensorInfo(m_Data.m_Inputs[1]); - const TensorInfo& anchorsInfo = GetTensorInfo(m_Anchors.get()); - const TensorInfo& detectionBoxesInfo = GetTensorInfo(m_Data.m_Outputs[0]); - const TensorInfo& detectionClassesInfo = GetTensorInfo(m_Data.m_Outputs[1]); - const TensorInfo& detectionScoresInfo = GetTensorInfo(m_Data.m_Outputs[2]); - const TensorInfo& numDetectionsInfo = GetTensorInfo(m_Data.m_Outputs[3]); - - const float* boxEncodings = GetInputTensorDataFloat(0, m_Data); - const float* scores = GetInputTensorDataFloat(1, m_Data); - const float* anchors = m_Anchors->GetConstTensor<float>(); - - float* detectionBoxes = GetOutputTensorData<float>(0, m_Data); - float* detectionClasses = GetOutputTensorData<float>(1, m_Data); - float* detectionScores = GetOutputTensorData<float>(2, m_Data); - float* numDetections = GetOutputTensorData<float>(3, m_Data); - - DetectionPostProcess(boxEncodingsInfo, scoresInfo, anchorsInfo, - detectionBoxesInfo, detectionClassesInfo, - detectionScoresInfo, numDetectionsInfo, m_Data.m_Parameters, - boxEncodings, scores, anchors, detectionBoxes, - detectionClasses, detectionScores, numDetections); -} - -} //namespace armnn diff --git a/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.cpp b/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.cpp deleted file mode 100644 index ccdaf87c9a..0000000000 --- a/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefDetectionPostProcessUint8Workload.hpp" - -#include "DetectionPostProcess.hpp" -#include "Profiling.hpp" -#include "RefWorkloadUtils.hpp" - -namespace armnn -{ - -RefDetectionPostProcessUint8Workload::RefDetectionPostProcessUint8Workload( - const DetectionPostProcessQueueDescriptor& descriptor, const WorkloadInfo& info) - : Uint8ToFloat32Workload<DetectionPostProcessQueueDescriptor>(descriptor, info), - m_Anchors(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Anchors))) {} - -void RefDetectionPostProcessUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDetectionPostProcessUint8Workload_Execute"); - - const TensorInfo& boxEncodingsInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& scoresInfo = GetTensorInfo(m_Data.m_Inputs[1]); - const TensorInfo& anchorsInfo = GetTensorInfo(m_Anchors.get()); - const TensorInfo& detectionBoxesInfo = GetTensorInfo(m_Data.m_Outputs[0]); - const TensorInfo& detectionClassesInfo = GetTensorInfo(m_Data.m_Outputs[1]); - const TensorInfo& detectionScoresInfo = GetTensorInfo(m_Data.m_Outputs[2]); - const TensorInfo& numDetectionsInfo = GetTensorInfo(m_Data.m_Outputs[3]); - - const uint8_t* boxEncodingsData = GetInputTensorDataU8(0, m_Data); - const uint8_t* scoresData = GetInputTensorDataU8(1, m_Data); - const uint8_t* anchorsData = m_Anchors->GetConstTensor<uint8_t>(); - - auto boxEncodings = Dequantize(boxEncodingsData, boxEncodingsInfo); - auto scores = Dequantize(scoresData, scoresInfo); - auto anchors = Dequantize(anchorsData, anchorsInfo); - - float* detectionBoxes = GetOutputTensorData<float>(0, m_Data); - float* detectionClasses = GetOutputTensorData<float>(1, m_Data); - float* detectionScores = GetOutputTensorData<float>(2, m_Data); - float* numDetections = GetOutputTensorData<float>(3, m_Data); - - DetectionPostProcess(boxEncodingsInfo, scoresInfo, anchorsInfo, - detectionBoxesInfo, detectionClassesInfo, - detectionScoresInfo, numDetectionsInfo, m_Data.m_Parameters, - boxEncodings.data(), scores.data(), anchors.data(), - detectionBoxes, detectionClasses, detectionScores, numDetections); -} - -} //namespace armnn diff --git a/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.hpp b/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.hpp deleted file mode 100644 index 91590f57bd..0000000000 --- a/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.hpp +++ /dev/null @@ -1,25 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include <backendsCommon/Workload.hpp> -#include <backendsCommon/WorkloadData.hpp> - -namespace armnn -{ - -class RefDetectionPostProcessUint8Workload : public Uint8ToFloat32Workload<DetectionPostProcessQueueDescriptor> -{ -public: - explicit RefDetectionPostProcessUint8Workload(const DetectionPostProcessQueueDescriptor& descriptor, - const WorkloadInfo& info); - virtual void Execute() const override; - -private: - std::unique_ptr<ScopedCpuTensorHandle> m_Anchors; -}; - -} //namespace armnn diff --git a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp new file mode 100644 index 0000000000..db24cc53e4 --- /dev/null +++ b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp @@ -0,0 +1,50 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefDetectionPostProcessWorkload.hpp" + +#include "Decoders.hpp" +#include "DetectionPostProcess.hpp" +#include "Profiling.hpp" +#include "RefWorkloadUtils.hpp" + +namespace armnn +{ + +RefDetectionPostProcessWorkload::RefDetectionPostProcessWorkload( + const DetectionPostProcessQueueDescriptor& descriptor, const WorkloadInfo& info) + : BaseWorkload<DetectionPostProcessQueueDescriptor>(descriptor, info), + m_Anchors(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Anchors))) {} + +void RefDetectionPostProcessWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDetectionPostProcessWorkload_Execute"); + + const TensorInfo& boxEncodingsInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& scoresInfo = GetTensorInfo(m_Data.m_Inputs[1]); + const TensorInfo& anchorsInfo = GetTensorInfo(m_Anchors.get()); + + const TensorInfo& detectionBoxesInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& detectionClassesInfo = GetTensorInfo(m_Data.m_Outputs[1]); + const TensorInfo& detectionScoresInfo = GetTensorInfo(m_Data.m_Outputs[2]); + const TensorInfo& numDetectionsInfo = GetTensorInfo(m_Data.m_Outputs[3]); + + auto boxEncodings = MakeDecoder<float>(boxEncodingsInfo, m_Data.m_Inputs[0]->Map()); + auto scores = MakeDecoder<float>(scoresInfo, m_Data.m_Inputs[1]->Map()); + auto anchors = MakeDecoder<float>(anchorsInfo, m_Anchors->Map(false)); + + float* detectionBoxes = GetOutputTensorData<float>(0, m_Data); + float* detectionClasses = GetOutputTensorData<float>(1, m_Data); + float* detectionScores = GetOutputTensorData<float>(2, m_Data); + float* numDetections = GetOutputTensorData<float>(3, m_Data); + + DetectionPostProcess(boxEncodingsInfo, scoresInfo, anchorsInfo, + detectionBoxesInfo, detectionClassesInfo, + detectionScoresInfo, numDetectionsInfo, m_Data.m_Parameters, + *boxEncodings, *scores, *anchors, detectionBoxes, + detectionClasses, detectionScores, numDetections); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.hpp b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp index 9f2a697ada..799d0c6219 100644 --- a/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.hpp +++ b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp @@ -11,11 +11,11 @@ namespace armnn { -class RefDetectionPostProcessFloat32Workload : public Float32Workload<DetectionPostProcessQueueDescriptor> +class RefDetectionPostProcessWorkload : public BaseWorkload<DetectionPostProcessQueueDescriptor> { public: - explicit RefDetectionPostProcessFloat32Workload(const DetectionPostProcessQueueDescriptor& descriptor, - const WorkloadInfo& info); + explicit RefDetectionPostProcessWorkload(const DetectionPostProcessQueueDescriptor& descriptor, + const WorkloadInfo& info); virtual void Execute() const override; private: diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp index 7ccd4efc54..8d99b69685 100644 --- a/src/backends/reference/workloads/RefWorkloads.hpp +++ b/src/backends/reference/workloads/RefWorkloads.hpp @@ -31,8 +31,7 @@ #include "RefResizeBilinearFloat32Workload.hpp" #include "ResizeBilinear.hpp" #include "RefNormalizationFloat32Workload.hpp" -#include "RefDetectionPostProcessFloat32Workload.hpp" -#include "RefDetectionPostProcessUint8Workload.hpp" +#include "RefDetectionPostProcessWorkload.hpp" #include "RefPooling2dUint8Workload.hpp" #include "BatchNormImpl.hpp" #include "Activation.hpp" |