From 23c26277086c78704a17f0dae86da947816320c0 Mon Sep 17 00:00:00 2001 From: George Gekov Date: Mon, 16 Aug 2021 11:32:10 +0100 Subject: MLECO-2079 Adding the C++ KWS example Signed-off-by: Eanna O Cathain Change-Id: I81899bbfaada32f478c2e2fc6441eabb94d8d0fc --- samples/KeywordSpotting/include/Decoder.hpp | 32 ++++++++ .../KeywordSpotting/include/DsCNNPreprocessor.hpp | 39 ++++++++++ samples/KeywordSpotting/include/DsCnnMfcc.hpp | 20 +++++ .../include/KeywordSpottingPipeline.hpp | 91 ++++++++++++++++++++++ 4 files changed, 182 insertions(+) create mode 100644 samples/KeywordSpotting/include/Decoder.hpp create mode 100644 samples/KeywordSpotting/include/DsCNNPreprocessor.hpp create mode 100644 samples/KeywordSpotting/include/DsCnnMfcc.hpp create mode 100644 samples/KeywordSpotting/include/KeywordSpottingPipeline.hpp (limited to 'samples/KeywordSpotting/include') diff --git a/samples/KeywordSpotting/include/Decoder.hpp b/samples/KeywordSpotting/include/Decoder.hpp new file mode 100644 index 0000000000..aca68312bc --- /dev/null +++ b/samples/KeywordSpotting/include/Decoder.hpp @@ -0,0 +1,32 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +# pragma once + +#include +#include +#include "ArmnnNetworkExecutor.hpp" + +namespace kws +{ + +/** +* @brief Decodes quantised last layer of model output +* +*/ +class Decoder +{ +private: + int quantisationOffset; + float quantisationScale; + +public: + + Decoder(int quantisationOffset, float quantisationScale) : quantisationOffset(quantisationOffset), + quantisationScale(quantisationScale) {} + + std::pair decodeOutput(std::vector& modelOutput); + +}; +} // namespace kws \ No newline at end of file diff --git a/samples/KeywordSpotting/include/DsCNNPreprocessor.hpp b/samples/KeywordSpotting/include/DsCNNPreprocessor.hpp new file mode 100644 index 0000000000..b635d1a41e --- /dev/null +++ b/samples/KeywordSpotting/include/DsCNNPreprocessor.hpp @@ -0,0 +1,39 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#ifndef KEYWORD_SPOTTING_EXAMPLE_DSCNNPREPROCESSOR_HPP +#define KEYWORD_SPOTTING_EXAMPLE_DSCNNPREPROCESSOR_HPP + +#include +#include "DsCnnMfcc.hpp" + +namespace kws +{ +class DsCNNPreprocessor +{ +public: + DsCNNPreprocessor(uint32_t windowLen, uint32_t windowStride, + std::unique_ptr mfccInst); + + /** + * @brief Calculates the features required from audio data. This + * includes MFCC, first and second order deltas, + * normalisation and finally, quantisation. The tensor is + * populated with feature from a given window placed along + * in a single row. + * @param[in] audioData pointer to the first element of audio data + * @param[in] output output to be populated + * @return true if successful, false in case of error. + */ + std::vector Invoke(const float* audioData, + size_t dataSize, + int quantOffset, + float quantScale) ; + + uint32_t m_windowLen; // Window length for MFCC + uint32_t m_windowStride; // Window stride len for MFCC + std::unique_ptr m_mfcc; +}; +} // namespace kws +#endif //KEYWORD_SPOTTING_EXAMPLE_DSCNNPREPROCESSOR_HPP diff --git a/samples/KeywordSpotting/include/DsCnnMfcc.hpp b/samples/KeywordSpotting/include/DsCnnMfcc.hpp new file mode 100644 index 0000000000..851e010b22 --- /dev/null +++ b/samples/KeywordSpotting/include/DsCnnMfcc.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "MFCC.hpp" + +/* Class to provide DS-CNN specific MFCC calculation requirements. */ +class DsCnnMFCC : public MFCC +{ + +public: + + explicit DsCnnMFCC(MfccParams& params) + : MFCC(params) + {} + DsCnnMFCC() = delete; + ~DsCnnMFCC() = default; +}; diff --git a/samples/KeywordSpotting/include/KeywordSpottingPipeline.hpp b/samples/KeywordSpotting/include/KeywordSpottingPipeline.hpp new file mode 100644 index 0000000000..bd47987a59 --- /dev/null +++ b/samples/KeywordSpotting/include/KeywordSpottingPipeline.hpp @@ -0,0 +1,91 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ArmnnNetworkExecutor.hpp" +#include "Decoder.hpp" +#include "MFCC.hpp" +#include "DsCNNPreprocessor.hpp" + +namespace kws +{ +/** + * Generic Keyword Spotting pipeline with 3 steps: data pre-processing, inference execution and inference + * result post-processing. + * + */ +class KWSPipeline +{ +public: + + /** + * Creates speech recognition pipeline with given network executor and decoder. + * @param executor - unique pointer to inference runner + * @param decoder - unique pointer to inference results decoder + */ + KWSPipeline(std::unique_ptr> executor, + std::unique_ptr decoder, + std::unique_ptr preProcessor); + + /** + * @brief Standard audio pre-processing implementation. + * + * Preprocesses and prepares the data for inference by + * extracting the MFCC features. + + * @param[in] audio - the raw audio data + */ + + std::vector PreProcessing(std::vector& audio); + + /** + * @brief Executes inference + * + * Calls inference runner provided during instance construction. + * + * @param[in] preprocessedData - input inference data. Data type should be aligned with input tensor. + * @param[out] result - raw inference results. + */ + void Inference(const std::vector& preprocessedData, common::InferenceResults& result); + + /** + * @brief Standard inference results post-processing implementation. + * + * Decodes inference results using decoder provided during construction. + * + * @param[in] inferenceResult - inference results to be decoded. + * @param[in] labels - the words we use for the model + */ + void PostProcessing(common::InferenceResults& inferenceResults, + std::map& labels, + const std::function& callback); + + /** + * @brief Get the number of samples for the pipeline input + + * @return - number of samples for the pipeline + */ + int getInputSamplesSize(); + +protected: + std::unique_ptr> m_executor; + std::unique_ptr m_decoder; + std::unique_ptr m_preProcessor; +}; + +using IPipelinePtr = std::unique_ptr; + +/** + * Constructs speech recognition pipeline based on configuration provided. + * + * @param[in] config - speech recognition pipeline configuration. + * @param[in] labels - asr labels + * + * @return unique pointer to asr pipeline. + */ +IPipelinePtr CreatePipeline(common::PipelineOptions& config); + +};// namespace kws \ No newline at end of file -- cgit v1.2.1