From bc67cef3e3dc9e7fe9c4331495009eda48c89527 Mon Sep 17 00:00:00 2001 From: Narumol Prangnawarat Date: Thu, 31 Jan 2019 15:31:54 +0000 Subject: IVGCVSW-2557 Ref Workload Implementation for Detection PostProcess * implementation of DetectionPostProcessQueueDescriptor validate * add Uint8ToFloat32Workload * add implementation of Detection PostProcess functionalities * add ref workload implemenentation for float and uint8 * add layer support for Detection PostProcess in ref * unit tests Change-Id: I650461f49edbb3c533d68ef8700377af51bc3592 --- src/backends/reference/workloads/CMakeLists.txt | 6 + .../reference/workloads/DetectionPostProcess.cpp | 264 +++++++++++++++++++++ .../reference/workloads/DetectionPostProcess.hpp | 29 +++ .../RefDetectionPostProcessFloat32Workload.cpp | 48 ++++ .../RefDetectionPostProcessFloat32Workload.hpp | 25 ++ .../RefDetectionPostProcessUint8Workload.cpp | 52 ++++ .../RefDetectionPostProcessUint8Workload.hpp | 25 ++ src/backends/reference/workloads/RefWorkloads.hpp | 2 + 8 files changed, 451 insertions(+) create mode 100644 src/backends/reference/workloads/DetectionPostProcess.cpp create mode 100644 src/backends/reference/workloads/DetectionPostProcess.hpp create mode 100644 src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.cpp create mode 100644 src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.hpp create mode 100644 src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.cpp create mode 100644 src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.hpp (limited to 'src/backends/reference/workloads') diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt index 57e89fa456..47e42f7fcc 100644 --- a/src/backends/reference/workloads/CMakeLists.txt +++ b/src/backends/reference/workloads/CMakeLists.txt @@ -15,6 +15,8 @@ list(APPEND armnnRefBackendWorkloads_sources ConvImpl.hpp Debug.cpp Debug.hpp + DetectionPostProcess.cpp + DetectionPostProcess.hpp ElementwiseFunction.cpp ElementwiseFunction.hpp FullyConnected.cpp @@ -60,6 +62,10 @@ list(APPEND armnnRefBackendWorkloads_sources RefDepthwiseConvolution2dFloat32Workload.hpp RefDepthwiseConvolution2dUint8Workload.cpp RefDepthwiseConvolution2dUint8Workload.hpp + RefDetectionPostProcessUint8Workload.cpp + RefDetectionPostProcessUint8Workload.hpp + RefDetectionPostProcessFloat32Workload.cpp + RefDetectionPostProcessFloat32Workload.hpp RefFakeQuantizationFloat32Workload.cpp RefFakeQuantizationFloat32Workload.hpp RefFloorFloat32Workload.cpp diff --git a/src/backends/reference/workloads/DetectionPostProcess.cpp b/src/backends/reference/workloads/DetectionPostProcess.cpp new file mode 100644 index 0000000000..958de8294b --- /dev/null +++ b/src/backends/reference/workloads/DetectionPostProcess.cpp @@ -0,0 +1,264 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "DetectionPostProcess.hpp" + +#include + +#include + +#include +#include + +namespace +{ + +std::vector GenerateRangeK(unsigned int k) +{ + std::vector range(k); + std::iota(range.begin(), range.end(), 0); + return range; +} + +void TopKSort(unsigned int k, unsigned int* indices, const float* values, unsigned int numElement) +{ + std::partial_sort(indices, indices + k, indices + numElement, + [&values](unsigned int i, unsigned int j) { return values[i] > values[j]; }); +} + +float IntersectionOverUnion(const float* boxI, const float* boxJ) +{ + // Box-corner format: ymin, xmin, ymax, xmax. + const int yMin = 0; + const int xMin = 1; + const int yMax = 2; + const int xMax = 3; + float areaI = (boxI[yMax] - boxI[yMin]) * (boxI[xMax] - boxI[xMin]); + float areaJ = (boxJ[yMax] - boxJ[yMin]) * (boxJ[xMax] - boxJ[xMin]); + float yMinIntersection = std::max(boxI[yMin], boxJ[yMin]); + float xMinIntersection = std::max(boxI[xMin], boxJ[xMin]); + float yMaxIntersection = std::min(boxI[yMax], boxJ[yMax]); + float xMaxIntersection = std::min(boxI[xMax], boxJ[xMax]); + float areaIntersection = std::max(yMaxIntersection - yMinIntersection, 0.0f) * + std::max(xMaxIntersection - xMinIntersection, 0.0f); + float areaUnion = areaI + areaJ - areaIntersection; + return areaIntersection / areaUnion; +} + +std::vector NonMaxSuppression(unsigned int numBoxes, const std::vector& boxCorners, + const std::vector& scores, float nmsScoreThreshold, + unsigned int maxDetection, float nmsIouThreshold) +{ + // Select boxes that have scores above a given threshold. + std::vector scoresAboveThreshold; + std::vector indicesAboveThreshold; + for (unsigned int i = 0; i < numBoxes; ++i) + { + if (scores[i] >= nmsScoreThreshold) + { + scoresAboveThreshold.push_back(scores[i]); + indicesAboveThreshold.push_back(i); + } + } + + // Sort the indices based on scores. + unsigned int numAboveThreshold = boost::numeric_cast(scoresAboveThreshold.size()); + std::vector sortedIndices = GenerateRangeK(numAboveThreshold); + TopKSort(numAboveThreshold,sortedIndices.data(), scoresAboveThreshold.data(), numAboveThreshold); + + // Number of output cannot be more than max detections specified in the option. + unsigned int numOutput = std::min(maxDetection, numAboveThreshold); + std::vector outputIndices; + std::vector visited(numAboveThreshold, false); + + // Prune out the boxes with high intersection over union by keeping the box with higher score. + for (unsigned int i = 0; i < numAboveThreshold; ++i) + { + if (outputIndices.size() >= numOutput) + { + break; + } + if (!visited[sortedIndices[i]]) + { + outputIndices.push_back(indicesAboveThreshold[sortedIndices[i]]); + } + for (unsigned int j = i + 1; j < numAboveThreshold; ++j) + { + unsigned int iIndex = indicesAboveThreshold[sortedIndices[i]] * 4; + unsigned int jIndex = indicesAboveThreshold[sortedIndices[j]] * 4; + if (IntersectionOverUnion(&boxCorners[iIndex], &boxCorners[jIndex]) > nmsIouThreshold) + { + visited[sortedIndices[j]] = true; + } + } + } + return outputIndices; +} + +void AllocateOutputData(unsigned int numOutput, unsigned int numSelected, const std::vector& boxCorners, + const std::vector& outputIndices, const std::vector& selectedBoxes, + const std::vector& selectedClasses, const std::vector& selectedScores, + float* detectionBoxes, float* detectionScores, float* detectionClasses, float* numDetections) +{ + for (unsigned int i = 0; i < numOutput; ++i) + { + unsigned int boxIndex = i * 4; + unsigned int boxConorIndex = selectedBoxes[outputIndices[i]] * 4; + if (i < numSelected) + { + detectionScores[i] = selectedScores[outputIndices[i]]; + detectionClasses[i] = boost::numeric_cast(selectedClasses[outputIndices[i]]); + detectionBoxes[boxIndex] = boxCorners[boxConorIndex]; + detectionBoxes[boxIndex + 1] = boxCorners[boxConorIndex + 1]; + detectionBoxes[boxIndex + 2] = boxCorners[boxConorIndex + 2]; + detectionBoxes[boxIndex + 3] = boxCorners[boxConorIndex + 3]; + } + else + { + detectionScores[i] = 0.0f; + detectionClasses[i] = 0.0f; + detectionBoxes[boxIndex] = 0.0f; + detectionBoxes[boxIndex + 1] = 0.0f; + detectionBoxes[boxIndex + 2] = 0.0f; + detectionBoxes[boxIndex + 3] = 0.0f; + } + } + numDetections[0] = boost::numeric_cast(numOutput); +} + +} // anonymous namespace + +namespace armnn +{ + +void DetectionPostProcess(const TensorInfo& boxEncodingsInfo, + const TensorInfo& scoresInfo, + const TensorInfo& anchorsInfo, + const TensorInfo& detectionBoxesInfo, + const TensorInfo& detectionClassesInfo, + const TensorInfo& detectionScoresInfo, + const TensorInfo& numDetectionsInfo, + const DetectionPostProcessDescriptor& desc, + const float* boxEncodings, + const float* scores, + const float* anchors, + float* detectionBoxes, + float* detectionClasses, + float* detectionScores, + float* numDetections) +{ + // Transform center-size format which is (ycenter, xcenter, height, width) to box-corner format, + // which represents the lower left corner and the upper right corner (ymin, xmin, ymax, xmax) + std::vector boxCorners(boxEncodingsInfo.GetNumElements()); + unsigned int numBoxes = boxEncodingsInfo.GetShape()[1]; + for (unsigned int i = 0; i < numBoxes; ++i) + { + unsigned int indexY = i * 4; + unsigned int indexX = indexY + 1; + unsigned int indexH = indexX + 1; + unsigned int indexW = indexH + 1; + float yCentre = boxEncodings[indexY] / desc.m_ScaleY * anchors[indexH] + anchors[indexY]; + float xCentre = boxEncodings[indexX] / desc.m_ScaleX * anchors[indexW] + anchors[indexX]; + float halfH = 0.5f * expf(boxEncodings[indexH] / desc.m_ScaleH) * anchors[indexH]; + float halfW = 0.5f * expf(boxEncodings[indexW] / desc.m_ScaleW) * anchors[indexW]; + // ymin + boxCorners[indexY] = yCentre - halfH; + // xmin + boxCorners[indexX] = xCentre - halfW; + // ymax + boxCorners[indexH] = yCentre + halfH; + // xmax + boxCorners[indexW] = xCentre + halfW; + + BOOST_ASSERT(boxCorners[indexY] < boxCorners[indexH]); + BOOST_ASSERT(boxCorners[indexX] < boxCorners[indexW]); + } + + unsigned int numClassesWithBg = desc.m_NumClasses + 1; + + // Perform Non Max Suppression. + if (desc.m_UseRegularNms) + { + // Perform Regular NMS. + // For each class, perform NMS and select max detection numbers of the highest score across all classes. + std::vector classScores(numBoxes); + std::vectorselectedBoxesAfterNms; + std::vector selectedScoresAfterNms; + std::vector selectedClasses; + + for (unsigned int c = 0; c < desc.m_NumClasses; ++c) + { + // For each boxes, get scores of the boxes for the class c. + for (unsigned int i = 0; i < numBoxes; ++i) + { + classScores[i] = scores[i * numClassesWithBg + c + 1]; + } + std::vector selectedIndices = NonMaxSuppression(numBoxes, boxCorners, classScores, + desc.m_NmsScoreThreshold, + desc.m_MaxClassesPerDetection, + desc.m_NmsIouThreshold); + + for (unsigned int i = 0; i < selectedIndices.size(); ++i) + { + selectedBoxesAfterNms.push_back(selectedIndices[i]); + selectedScoresAfterNms.push_back(classScores[selectedIndices[i]]); + selectedClasses.push_back(c); + } + } + + // Select max detection numbers of the highest score across all classes + unsigned int numSelected = boost::numeric_cast(selectedBoxesAfterNms.size()); + unsigned int numOutput = std::min(desc.m_MaxDetections, numSelected); + + // Sort the max scores among the selected indices. + std::vector outputIndices = GenerateRangeK(numSelected); + TopKSort(numOutput, outputIndices.data(), selectedScoresAfterNms.data(), numSelected); + + AllocateOutputData(numOutput, numSelected, boxCorners, outputIndices, + selectedBoxesAfterNms, selectedClasses, selectedScoresAfterNms, + detectionBoxes, detectionScores, detectionClasses, numDetections); + } + else + { + // Perform Fast NMS. + // Select max scores of boxes and perform NMS on max scores, + // select max detection numbers of the highest score + unsigned int numClassesPerBox = std::min(desc.m_MaxClassesPerDetection, desc.m_NumClasses); + std::vector maxScores; + std::vectorboxIndices; + std::vectormaxScoreClasses; + + for (unsigned int box = 0; box < numBoxes; ++box) + { + unsigned int scoreIndex = box * numClassesWithBg + 1; + + // Get the max scores of the box. + std::vector maxScoreIndices = GenerateRangeK(desc.m_NumClasses); + TopKSort(numClassesPerBox, maxScoreIndices.data(), scores + scoreIndex, desc.m_NumClasses); + + for (unsigned int i = 0; i < numClassesPerBox; ++i) + { + maxScores.push_back(scores[scoreIndex + maxScoreIndices[i]]); + maxScoreClasses.push_back(maxScoreIndices[i]); + boxIndices.push_back(box); + } + } + + // Perform NMS on max scores + std::vector selectedIndices = NonMaxSuppression(numBoxes, boxCorners, maxScores, + desc.m_NmsScoreThreshold, + desc.m_MaxDetections, + desc.m_NmsIouThreshold); + + unsigned int numSelected = boost::numeric_cast(selectedIndices.size()); + unsigned int numOutput = std::min(desc.m_MaxDetections, numSelected); + + AllocateOutputData(numOutput, numSelected, boxCorners, selectedIndices, + boxIndices, maxScoreClasses, maxScores, + detectionBoxes, detectionScores, detectionClasses, numDetections); + } +} + +} // namespace armnn diff --git a/src/backends/reference/workloads/DetectionPostProcess.hpp b/src/backends/reference/workloads/DetectionPostProcess.hpp new file mode 100644 index 0000000000..06e9e15781 --- /dev/null +++ b/src/backends/reference/workloads/DetectionPostProcess.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "armnn/Tensor.hpp" +#include "armnn/Descriptors.hpp" + +namespace armnn +{ + +void DetectionPostProcess(const TensorInfo& boxEncodingsInfo, + const TensorInfo& scoresInfo, + const TensorInfo& anchorsInfo, + const TensorInfo& detectionBoxesInfo, + const TensorInfo& detectionClassesInfo, + const TensorInfo& detectionScoresInfo, + const TensorInfo& numDetectionsInfo, + const DetectionPostProcessDescriptor& desc, + const float* boxEncodings, + const float* scores, + const float* anchors, + float* detectionBoxes, + float* detectionClasses, + float* detectionScores, + float* numDetections); + +} // namespace armnn diff --git a/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.cpp b/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.cpp new file mode 100644 index 0000000000..ddab046f9c --- /dev/null +++ b/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.cpp @@ -0,0 +1,48 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefDetectionPostProcessFloat32Workload.hpp" + +#include "DetectionPostProcess.hpp" +#include "Profiling.hpp" +#include "RefWorkloadUtils.hpp" + +namespace armnn +{ + +RefDetectionPostProcessFloat32Workload::RefDetectionPostProcessFloat32Workload( + const DetectionPostProcessQueueDescriptor& descriptor, const WorkloadInfo& info) + : Float32Workload(descriptor, info), + m_Anchors(std::make_unique(*(descriptor.m_Anchors))) {} + +void RefDetectionPostProcessFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDetectionPostProcessUint8Workload_Execute"); + + const TensorInfo& boxEncodingsInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& scoresInfo = GetTensorInfo(m_Data.m_Inputs[1]); + const TensorInfo& anchorsInfo = GetTensorInfo(m_Anchors.get()); + const TensorInfo& detectionBoxesInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& detectionClassesInfo = GetTensorInfo(m_Data.m_Outputs[1]); + const TensorInfo& detectionScoresInfo = GetTensorInfo(m_Data.m_Outputs[2]); + const TensorInfo& numDetectionsInfo = GetTensorInfo(m_Data.m_Outputs[3]); + + const float* boxEncodings = GetInputTensorDataFloat(0, m_Data); + const float* scores = GetInputTensorDataFloat(1, m_Data); + const float* anchors = m_Anchors->GetConstTensor(); + + float* detectionBoxes = GetOutputTensorData(0, m_Data); + float* detectionClasses = GetOutputTensorData(1, m_Data); + float* detectionScores = GetOutputTensorData(2, m_Data); + float* numDetections = GetOutputTensorData(3, m_Data); + + DetectionPostProcess(boxEncodingsInfo, scoresInfo, anchorsInfo, + detectionBoxesInfo, detectionClassesInfo, + detectionScoresInfo, numDetectionsInfo, m_Data.m_Parameters, + boxEncodings, scores, anchors, detectionBoxes, + detectionClasses, detectionScores, numDetections); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.hpp b/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.hpp new file mode 100644 index 0000000000..9f2a697ada --- /dev/null +++ b/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +namespace armnn +{ + +class RefDetectionPostProcessFloat32Workload : public Float32Workload +{ +public: + explicit RefDetectionPostProcessFloat32Workload(const DetectionPostProcessQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + std::unique_ptr m_Anchors; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.cpp b/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.cpp new file mode 100644 index 0000000000..ccdaf87c9a --- /dev/null +++ b/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.cpp @@ -0,0 +1,52 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefDetectionPostProcessUint8Workload.hpp" + +#include "DetectionPostProcess.hpp" +#include "Profiling.hpp" +#include "RefWorkloadUtils.hpp" + +namespace armnn +{ + +RefDetectionPostProcessUint8Workload::RefDetectionPostProcessUint8Workload( + const DetectionPostProcessQueueDescriptor& descriptor, const WorkloadInfo& info) + : Uint8ToFloat32Workload(descriptor, info), + m_Anchors(std::make_unique(*(descriptor.m_Anchors))) {} + +void RefDetectionPostProcessUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDetectionPostProcessUint8Workload_Execute"); + + const TensorInfo& boxEncodingsInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& scoresInfo = GetTensorInfo(m_Data.m_Inputs[1]); + const TensorInfo& anchorsInfo = GetTensorInfo(m_Anchors.get()); + const TensorInfo& detectionBoxesInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& detectionClassesInfo = GetTensorInfo(m_Data.m_Outputs[1]); + const TensorInfo& detectionScoresInfo = GetTensorInfo(m_Data.m_Outputs[2]); + const TensorInfo& numDetectionsInfo = GetTensorInfo(m_Data.m_Outputs[3]); + + const uint8_t* boxEncodingsData = GetInputTensorDataU8(0, m_Data); + const uint8_t* scoresData = GetInputTensorDataU8(1, m_Data); + const uint8_t* anchorsData = m_Anchors->GetConstTensor(); + + auto boxEncodings = Dequantize(boxEncodingsData, boxEncodingsInfo); + auto scores = Dequantize(scoresData, scoresInfo); + auto anchors = Dequantize(anchorsData, anchorsInfo); + + float* detectionBoxes = GetOutputTensorData(0, m_Data); + float* detectionClasses = GetOutputTensorData(1, m_Data); + float* detectionScores = GetOutputTensorData(2, m_Data); + float* numDetections = GetOutputTensorData(3, m_Data); + + DetectionPostProcess(boxEncodingsInfo, scoresInfo, anchorsInfo, + detectionBoxesInfo, detectionClassesInfo, + detectionScoresInfo, numDetectionsInfo, m_Data.m_Parameters, + boxEncodings.data(), scores.data(), anchors.data(), + detectionBoxes, detectionClasses, detectionScores, numDetections); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.hpp b/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.hpp new file mode 100644 index 0000000000..91590f57bd --- /dev/null +++ b/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +namespace armnn +{ + +class RefDetectionPostProcessUint8Workload : public Uint8ToFloat32Workload +{ +public: + explicit RefDetectionPostProcessUint8Workload(const DetectionPostProcessQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + std::unique_ptr m_Anchors; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp index d9f4dbb342..2156388ba2 100644 --- a/src/backends/reference/workloads/RefWorkloads.hpp +++ b/src/backends/reference/workloads/RefWorkloads.hpp @@ -38,6 +38,8 @@ #include "ResizeBilinear.hpp" #include "RefNormalizationFloat32Workload.hpp" #include "RefDepthwiseConvolution2dFloat32Workload.hpp" +#include "RefDetectionPostProcessFloat32Workload.hpp" +#include "RefDetectionPostProcessUint8Workload.hpp" #include "RefPooling2dUint8Workload.hpp" #include "BatchNormImpl.hpp" #include "Activation.hpp" -- cgit v1.2.1