From 6331f91a4a1cb1ad16c569d98bb9ddf704788464 Mon Sep 17 00:00:00 2001 From: Aron Virginas-Tar Date: Mon, 3 Jun 2019 17:10:02 +0100 Subject: IVGCVSW-2971 Support QSymm16 for DetectionPostProcess workloads Signed-off-by: Aron Virginas-Tar Change-Id: I8af45afe851a9ccbf8bce54727147fcd52ac9a1f --- src/backends/reference/workloads/CMakeLists.txt | 6 +- .../reference/workloads/DetectionPostProcess.cpp | 111 ++++++++++++++++----- .../reference/workloads/DetectionPostProcess.hpp | 24 ++++- .../RefDetectionPostProcessFloat32Workload.cpp | 48 --------- .../RefDetectionPostProcessFloat32Workload.hpp | 25 ----- .../RefDetectionPostProcessUint8Workload.cpp | 52 ---------- .../RefDetectionPostProcessUint8Workload.hpp | 25 ----- .../workloads/RefDetectionPostProcessWorkload.cpp | 50 ++++++++++ .../workloads/RefDetectionPostProcessWorkload.hpp | 25 +++++ src/backends/reference/workloads/RefWorkloads.hpp | 3 +- 10 files changed, 183 insertions(+), 186 deletions(-) delete mode 100644 src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.cpp delete mode 100644 src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.hpp delete mode 100644 src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.cpp delete mode 100644 src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.hpp create mode 100644 src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp create mode 100644 src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp (limited to 'src/backends/reference/workloads') diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt index cdca22da31..25d4b28366 100644 --- a/src/backends/reference/workloads/CMakeLists.txt +++ b/src/backends/reference/workloads/CMakeLists.txt @@ -60,10 +60,8 @@ list(APPEND armnnRefBackendWorkloads_sources RefDepthwiseConvolution2dWorkload.hpp RefDequantizeWorkload.cpp RefDequantizeWorkload.hpp - RefDetectionPostProcessUint8Workload.cpp - RefDetectionPostProcessUint8Workload.hpp - RefDetectionPostProcessFloat32Workload.cpp - RefDetectionPostProcessFloat32Workload.hpp + RefDetectionPostProcessWorkload.cpp + RefDetectionPostProcessWorkload.hpp RefFakeQuantizationFloat32Workload.cpp RefFakeQuantizationFloat32Workload.hpp RefFloorWorkload.cpp diff --git a/src/backends/reference/workloads/DetectionPostProcess.cpp b/src/backends/reference/workloads/DetectionPostProcess.cpp index d3790f22ab..d475dd8ac0 100644 --- a/src/backends/reference/workloads/DetectionPostProcess.cpp +++ b/src/backends/reference/workloads/DetectionPostProcess.cpp @@ -13,7 +13,7 @@ #include #include -namespace +namespace armnn { std::vector GenerateRangeK(unsigned int k) @@ -48,9 +48,12 @@ float IntersectionOverUnion(const float* boxI, const float* boxJ) return areaIntersection / areaUnion; } -std::vector NonMaxSuppression(unsigned int numBoxes, const std::vector& boxCorners, - const std::vector& scores, float nmsScoreThreshold, - unsigned int maxDetection, float nmsIouThreshold) +std::vector NonMaxSuppression(unsigned int numBoxes, + const std::vector& boxCorners, + const std::vector& scores, + float nmsScoreThreshold, + unsigned int maxDetection, + float nmsIouThreshold) { // Select boxes that have scores above a given threshold. std::vector scoresAboveThreshold; @@ -67,7 +70,7 @@ std::vector NonMaxSuppression(unsigned int numBoxes, const std::ve // Sort the indices based on scores. unsigned int numAboveThreshold = boost::numeric_cast(scoresAboveThreshold.size()); std::vector sortedIndices = GenerateRangeK(numAboveThreshold); - TopKSort(numAboveThreshold,sortedIndices.data(), scoresAboveThreshold.data(), numAboveThreshold); + TopKSort(numAboveThreshold, sortedIndices.data(), scoresAboveThreshold.data(), numAboveThreshold); // Number of output cannot be more than max detections specified in the option. unsigned int numOutput = std::min(maxDetection, numAboveThreshold); @@ -98,10 +101,17 @@ std::vector NonMaxSuppression(unsigned int numBoxes, const std::ve return outputIndices; } -void AllocateOutputData(unsigned int numOutput, unsigned int numSelected, const std::vector& boxCorners, - const std::vector& outputIndices, const std::vector& selectedBoxes, - const std::vector& selectedClasses, const std::vector& selectedScores, - float* detectionBoxes, float* detectionScores, float* detectionClasses, float* numDetections) +void AllocateOutputData(unsigned int numOutput, + unsigned int numSelected, + const std::vector& boxCorners, + const std::vector& outputIndices, + const std::vector& selectedBoxes, + const std::vector& selectedClasses, + const std::vector& selectedScores, + float* detectionBoxes, + float* detectionScores, + float* detectionClasses, + float* numDetections) { for (unsigned int i = 0; i < numOutput; ++i) { @@ -129,11 +139,6 @@ void AllocateOutputData(unsigned int numOutput, unsigned int numSelected, const numDetections[0] = boost::numeric_cast(numSelected); } -} // anonymous namespace - -namespace armnn -{ - void DetectionPostProcess(const TensorInfo& boxEncodingsInfo, const TensorInfo& scoresInfo, const TensorInfo& anchorsInfo, @@ -142,9 +147,9 @@ void DetectionPostProcess(const TensorInfo& boxEncodingsInfo, const TensorInfo& detectionScoresInfo, const TensorInfo& numDetectionsInfo, const DetectionPostProcessDescriptor& desc, - const float* boxEncodings, - const float* scores, - const float* anchors, + Decoder& boxEncodings, + Decoder& scores, + Decoder& anchors, float* detectionBoxes, float* detectionClasses, float* detectionScores, @@ -153,17 +158,51 @@ void DetectionPostProcess(const TensorInfo& boxEncodingsInfo, // Transform center-size format which is (ycenter, xcenter, height, width) to box-corner format, // which represents the lower left corner and the upper right corner (ymin, xmin, ymax, xmax) std::vector boxCorners(boxEncodingsInfo.GetNumElements()); - unsigned int numBoxes = boxEncodingsInfo.GetShape()[1]; + + const unsigned int numBoxes = boxEncodingsInfo.GetShape()[1]; + const unsigned int numScores = scoresInfo.GetNumElements(); + for (unsigned int i = 0; i < numBoxes; ++i) { + // Y + float boxEncodingY = boxEncodings.Get(); + float anchorY = anchors.Get(); + + ++boxEncodings; + ++anchors; + + // X + float boxEncodingX = boxEncodings.Get(); + float anchorX = anchors.Get(); + + ++boxEncodings; + ++anchors; + + // H + float boxEncodingH = boxEncodings.Get(); + float anchorH = anchors.Get(); + + ++boxEncodings; + ++anchors; + + // W + float boxEncodingW = boxEncodings.Get(); + float anchorW = anchors.Get(); + + ++boxEncodings; + ++anchors; + + float yCentre = boxEncodingY / desc.m_ScaleY * anchorH + anchorY; + float xCentre = boxEncodingX / desc.m_ScaleX * anchorW + anchorX; + + float halfH = 0.5f * expf(boxEncodingH / desc.m_ScaleH) * anchorH; + float halfW = 0.5f * expf(boxEncodingW / desc.m_ScaleW) * anchorW; + unsigned int indexY = i * 4; unsigned int indexX = indexY + 1; unsigned int indexH = indexX + 1; unsigned int indexW = indexH + 1; - float yCentre = boxEncodings[indexY] / desc.m_ScaleY * anchors[indexH] + anchors[indexY]; - float xCentre = boxEncodings[indexX] / desc.m_ScaleX * anchors[indexW] + anchors[indexX]; - float halfH = 0.5f * expf(boxEncodings[indexH] / desc.m_ScaleH) * anchors[indexH]; - float halfW = 0.5f * expf(boxEncodings[indexW] / desc.m_ScaleW) * anchors[indexW]; + // ymin boxCorners[indexY] = yCentre - halfH; // xmin @@ -179,14 +218,29 @@ void DetectionPostProcess(const TensorInfo& boxEncodingsInfo, unsigned int numClassesWithBg = desc.m_NumClasses + 1; + // Decode scores + std::vector decodedScores; + decodedScores.reserve(numScores); + + for (unsigned int i = 0u; i < numScores; ++i) + { + decodedScores.emplace_back(scores.Get()); + ++scores; + } + // Perform Non Max Suppression. if (desc.m_UseRegularNms) { // Perform Regular NMS. // For each class, perform NMS and select max detection numbers of the highest score across all classes. std::vector classScores(numBoxes); - std::vectorselectedBoxesAfterNms; + + std::vector selectedBoxesAfterNms; + selectedBoxesAfterNms.reserve(numBoxes); + std::vector selectedScoresAfterNms; + selectedBoxesAfterNms.reserve(numScores); + std::vector selectedClasses; for (unsigned int c = 0; c < desc.m_NumClasses; ++c) @@ -194,9 +248,11 @@ void DetectionPostProcess(const TensorInfo& boxEncodingsInfo, // For each boxes, get scores of the boxes for the class c. for (unsigned int i = 0; i < numBoxes; ++i) { - classScores[i] = scores[i * numClassesWithBg + c + 1]; + classScores[i] = decodedScores[i * numClassesWithBg + c + 1]; } - std::vector selectedIndices = NonMaxSuppression(numBoxes, boxCorners, classScores, + std::vector selectedIndices = NonMaxSuppression(numBoxes, + boxCorners, + classScores, desc.m_NmsScoreThreshold, desc.m_DetectionsPerClass, desc.m_NmsIouThreshold); @@ -237,11 +293,12 @@ void DetectionPostProcess(const TensorInfo& boxEncodingsInfo, // Get the max scores of the box. std::vector maxScoreIndices = GenerateRangeK(desc.m_NumClasses); - TopKSort(numClassesPerBox, maxScoreIndices.data(), scores + scoreIndex, desc.m_NumClasses); + TopKSort(numClassesPerBox, maxScoreIndices.data(), + decodedScores.data() + scoreIndex, desc.m_NumClasses); for (unsigned int i = 0; i < numClassesPerBox; ++i) { - maxScores.push_back(scores[scoreIndex + maxScoreIndices[i]]); + maxScores.push_back(decodedScores[scoreIndex + maxScoreIndices[i]]); maxScoreClasses.push_back(maxScoreIndices[i]); boxIndices.push_back(box); } diff --git a/src/backends/reference/workloads/DetectionPostProcess.hpp b/src/backends/reference/workloads/DetectionPostProcess.hpp index 06e9e15781..8700a53317 100644 --- a/src/backends/reference/workloads/DetectionPostProcess.hpp +++ b/src/backends/reference/workloads/DetectionPostProcess.hpp @@ -7,6 +7,10 @@ #include "armnn/Tensor.hpp" #include "armnn/Descriptors.hpp" +#include "Decoders.hpp" + +#include + namespace armnn { @@ -18,12 +22,26 @@ void DetectionPostProcess(const TensorInfo& boxEncodingsInfo, const TensorInfo& detectionScoresInfo, const TensorInfo& numDetectionsInfo, const DetectionPostProcessDescriptor& desc, - const float* boxEncodings, - const float* scores, - const float* anchors, + Decoder& boxEncodings, + Decoder& scores, + Decoder& anchors, float* detectionBoxes, float* detectionClasses, float* detectionScores, float* numDetections); +void TopKSort(unsigned int k, + unsigned int* indices, + const float* values, + unsigned int numElement); + +float IntersectionOverUnion(const float* boxI, const float* boxJ); + +std::vector NonMaxSuppression(unsigned int numBoxes, + const std::vector& boxCorners, + const std::vector& scores, + float nmsScoreThreshold, + unsigned int maxDetection, + float nmsIouThreshold); + } // namespace armnn diff --git a/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.cpp b/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.cpp deleted file mode 100644 index ddab046f9c..0000000000 --- a/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.cpp +++ /dev/null @@ -1,48 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefDetectionPostProcessFloat32Workload.hpp" - -#include "DetectionPostProcess.hpp" -#include "Profiling.hpp" -#include "RefWorkloadUtils.hpp" - -namespace armnn -{ - -RefDetectionPostProcessFloat32Workload::RefDetectionPostProcessFloat32Workload( - const DetectionPostProcessQueueDescriptor& descriptor, const WorkloadInfo& info) - : Float32Workload(descriptor, info), - m_Anchors(std::make_unique(*(descriptor.m_Anchors))) {} - -void RefDetectionPostProcessFloat32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDetectionPostProcessUint8Workload_Execute"); - - const TensorInfo& boxEncodingsInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& scoresInfo = GetTensorInfo(m_Data.m_Inputs[1]); - const TensorInfo& anchorsInfo = GetTensorInfo(m_Anchors.get()); - const TensorInfo& detectionBoxesInfo = GetTensorInfo(m_Data.m_Outputs[0]); - const TensorInfo& detectionClassesInfo = GetTensorInfo(m_Data.m_Outputs[1]); - const TensorInfo& detectionScoresInfo = GetTensorInfo(m_Data.m_Outputs[2]); - const TensorInfo& numDetectionsInfo = GetTensorInfo(m_Data.m_Outputs[3]); - - const float* boxEncodings = GetInputTensorDataFloat(0, m_Data); - const float* scores = GetInputTensorDataFloat(1, m_Data); - const float* anchors = m_Anchors->GetConstTensor(); - - float* detectionBoxes = GetOutputTensorData(0, m_Data); - float* detectionClasses = GetOutputTensorData(1, m_Data); - float* detectionScores = GetOutputTensorData(2, m_Data); - float* numDetections = GetOutputTensorData(3, m_Data); - - DetectionPostProcess(boxEncodingsInfo, scoresInfo, anchorsInfo, - detectionBoxesInfo, detectionClassesInfo, - detectionScoresInfo, numDetectionsInfo, m_Data.m_Parameters, - boxEncodings, scores, anchors, detectionBoxes, - detectionClasses, detectionScores, numDetections); -} - -} //namespace armnn diff --git a/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.hpp b/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.hpp deleted file mode 100644 index 9f2a697ada..0000000000 --- a/src/backends/reference/workloads/RefDetectionPostProcessFloat32Workload.hpp +++ /dev/null @@ -1,25 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include -#include - -namespace armnn -{ - -class RefDetectionPostProcessFloat32Workload : public Float32Workload -{ -public: - explicit RefDetectionPostProcessFloat32Workload(const DetectionPostProcessQueueDescriptor& descriptor, - const WorkloadInfo& info); - virtual void Execute() const override; - -private: - std::unique_ptr m_Anchors; -}; - -} //namespace armnn diff --git a/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.cpp b/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.cpp deleted file mode 100644 index ccdaf87c9a..0000000000 --- a/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefDetectionPostProcessUint8Workload.hpp" - -#include "DetectionPostProcess.hpp" -#include "Profiling.hpp" -#include "RefWorkloadUtils.hpp" - -namespace armnn -{ - -RefDetectionPostProcessUint8Workload::RefDetectionPostProcessUint8Workload( - const DetectionPostProcessQueueDescriptor& descriptor, const WorkloadInfo& info) - : Uint8ToFloat32Workload(descriptor, info), - m_Anchors(std::make_unique(*(descriptor.m_Anchors))) {} - -void RefDetectionPostProcessUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDetectionPostProcessUint8Workload_Execute"); - - const TensorInfo& boxEncodingsInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorInfo& scoresInfo = GetTensorInfo(m_Data.m_Inputs[1]); - const TensorInfo& anchorsInfo = GetTensorInfo(m_Anchors.get()); - const TensorInfo& detectionBoxesInfo = GetTensorInfo(m_Data.m_Outputs[0]); - const TensorInfo& detectionClassesInfo = GetTensorInfo(m_Data.m_Outputs[1]); - const TensorInfo& detectionScoresInfo = GetTensorInfo(m_Data.m_Outputs[2]); - const TensorInfo& numDetectionsInfo = GetTensorInfo(m_Data.m_Outputs[3]); - - const uint8_t* boxEncodingsData = GetInputTensorDataU8(0, m_Data); - const uint8_t* scoresData = GetInputTensorDataU8(1, m_Data); - const uint8_t* anchorsData = m_Anchors->GetConstTensor(); - - auto boxEncodings = Dequantize(boxEncodingsData, boxEncodingsInfo); - auto scores = Dequantize(scoresData, scoresInfo); - auto anchors = Dequantize(anchorsData, anchorsInfo); - - float* detectionBoxes = GetOutputTensorData(0, m_Data); - float* detectionClasses = GetOutputTensorData(1, m_Data); - float* detectionScores = GetOutputTensorData(2, m_Data); - float* numDetections = GetOutputTensorData(3, m_Data); - - DetectionPostProcess(boxEncodingsInfo, scoresInfo, anchorsInfo, - detectionBoxesInfo, detectionClassesInfo, - detectionScoresInfo, numDetectionsInfo, m_Data.m_Parameters, - boxEncodings.data(), scores.data(), anchors.data(), - detectionBoxes, detectionClasses, detectionScores, numDetections); -} - -} //namespace armnn diff --git a/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.hpp b/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.hpp deleted file mode 100644 index 91590f57bd..0000000000 --- a/src/backends/reference/workloads/RefDetectionPostProcessUint8Workload.hpp +++ /dev/null @@ -1,25 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include -#include - -namespace armnn -{ - -class RefDetectionPostProcessUint8Workload : public Uint8ToFloat32Workload -{ -public: - explicit RefDetectionPostProcessUint8Workload(const DetectionPostProcessQueueDescriptor& descriptor, - const WorkloadInfo& info); - virtual void Execute() const override; - -private: - std::unique_ptr m_Anchors; -}; - -} //namespace armnn diff --git a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp new file mode 100644 index 0000000000..db24cc53e4 --- /dev/null +++ b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp @@ -0,0 +1,50 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefDetectionPostProcessWorkload.hpp" + +#include "Decoders.hpp" +#include "DetectionPostProcess.hpp" +#include "Profiling.hpp" +#include "RefWorkloadUtils.hpp" + +namespace armnn +{ + +RefDetectionPostProcessWorkload::RefDetectionPostProcessWorkload( + const DetectionPostProcessQueueDescriptor& descriptor, const WorkloadInfo& info) + : BaseWorkload(descriptor, info), + m_Anchors(std::make_unique(*(descriptor.m_Anchors))) {} + +void RefDetectionPostProcessWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDetectionPostProcessWorkload_Execute"); + + const TensorInfo& boxEncodingsInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& scoresInfo = GetTensorInfo(m_Data.m_Inputs[1]); + const TensorInfo& anchorsInfo = GetTensorInfo(m_Anchors.get()); + + const TensorInfo& detectionBoxesInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& detectionClassesInfo = GetTensorInfo(m_Data.m_Outputs[1]); + const TensorInfo& detectionScoresInfo = GetTensorInfo(m_Data.m_Outputs[2]); + const TensorInfo& numDetectionsInfo = GetTensorInfo(m_Data.m_Outputs[3]); + + auto boxEncodings = MakeDecoder(boxEncodingsInfo, m_Data.m_Inputs[0]->Map()); + auto scores = MakeDecoder(scoresInfo, m_Data.m_Inputs[1]->Map()); + auto anchors = MakeDecoder(anchorsInfo, m_Anchors->Map(false)); + + float* detectionBoxes = GetOutputTensorData(0, m_Data); + float* detectionClasses = GetOutputTensorData(1, m_Data); + float* detectionScores = GetOutputTensorData(2, m_Data); + float* numDetections = GetOutputTensorData(3, m_Data); + + DetectionPostProcess(boxEncodingsInfo, scoresInfo, anchorsInfo, + detectionBoxesInfo, detectionClassesInfo, + detectionScoresInfo, numDetectionsInfo, m_Data.m_Parameters, + *boxEncodings, *scores, *anchors, detectionBoxes, + detectionClasses, detectionScores, numDetections); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp new file mode 100644 index 0000000000..799d0c6219 --- /dev/null +++ b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +namespace armnn +{ + +class RefDetectionPostProcessWorkload : public BaseWorkload +{ +public: + explicit RefDetectionPostProcessWorkload(const DetectionPostProcessQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + std::unique_ptr m_Anchors; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp index 7ccd4efc54..8d99b69685 100644 --- a/src/backends/reference/workloads/RefWorkloads.hpp +++ b/src/backends/reference/workloads/RefWorkloads.hpp @@ -31,8 +31,7 @@ #include "RefResizeBilinearFloat32Workload.hpp" #include "ResizeBilinear.hpp" #include "RefNormalizationFloat32Workload.hpp" -#include "RefDetectionPostProcessFloat32Workload.hpp" -#include "RefDetectionPostProcessUint8Workload.hpp" +#include "RefDetectionPostProcessWorkload.hpp" #include "RefPooling2dUint8Workload.hpp" #include "BatchNormImpl.hpp" #include "Activation.hpp" -- cgit v1.2.1