From 23c26277086c78704a17f0dae86da947816320c0 Mon Sep 17 00:00:00 2001
From: George Gekov <george.gekov@arm.com>
Date: Mon, 16 Aug 2021 11:32:10 +0100
Subject: MLECO-2079 Adding the C++ KWS example

Signed-off-by: Eanna O Cathain <eanna.ocathain@arm.com>
Change-Id: I81899bbfaada32f478c2e2fc6441eabb94d8d0fc
---
 samples/KeywordSpotting/include/Decoder.hpp        | 32 ++++++++
 .../KeywordSpotting/include/DsCNNPreprocessor.hpp  | 39 ++++++++++
 samples/KeywordSpotting/include/DsCnnMfcc.hpp      | 20 +++++
 .../include/KeywordSpottingPipeline.hpp            | 91 ++++++++++++++++++++++
 4 files changed, 182 insertions(+)
 create mode 100644 samples/KeywordSpotting/include/Decoder.hpp
 create mode 100644 samples/KeywordSpotting/include/DsCNNPreprocessor.hpp
 create mode 100644 samples/KeywordSpotting/include/DsCnnMfcc.hpp
 create mode 100644 samples/KeywordSpotting/include/KeywordSpottingPipeline.hpp

(limited to 'samples/KeywordSpotting/include')
diff --git a/samples/KeywordSpotting/include/Decoder.hpp b/samples/KeywordSpotting/include/Decoder.hpp
new file mode 100644
index 0000000000..aca68312bc
--- /dev/null
+++ b/samples/KeywordSpotting/include/Decoder.hpp
@@ -0,0 +1,32 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+# pragma once
+
+#include <string>
+#include <map>
+#include "ArmnnNetworkExecutor.hpp"
+
+namespace kws 
+{
+
+/**
+* @brief Decodes quantised last layer of model output
+*
+*/
+class Decoder 
+{
+private:
+    int quantisationOffset;
+    float quantisationScale;
+
+public:
+
+    Decoder(int quantisationOffset, float quantisationScale) : quantisationOffset(quantisationOffset),
+                                                               quantisationScale(quantisationScale) {}
+
+    std::pair<int, float> decodeOutput(std::vector<int8_t>& modelOutput);
+
+};
+} // namespace kws
\ No newline at end of file
diff --git a/samples/KeywordSpotting/include/DsCNNPreprocessor.hpp b/samples/KeywordSpotting/include/DsCNNPreprocessor.hpp
new file mode 100644
index 0000000000..b635d1a41e
--- /dev/null
+++ b/samples/KeywordSpotting/include/DsCNNPreprocessor.hpp
@@ -0,0 +1,39 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#ifndef KEYWORD_SPOTTING_EXAMPLE_DSCNNPREPROCESSOR_HPP
+#define KEYWORD_SPOTTING_EXAMPLE_DSCNNPREPROCESSOR_HPP
+
+#include <numeric>
+#include "DsCnnMfcc.hpp"
+
+namespace kws 
+{
+class DsCNNPreprocessor
+{
+public:
+    DsCNNPreprocessor(uint32_t windowLen, uint32_t windowStride,
+                      std::unique_ptr<DsCnnMFCC> mfccInst);
+
+    /**
+    * @brief       Calculates the features required from audio data. This
+    *              includes MFCC, first and second order deltas,
+    *              normalisation and finally, quantisation. The tensor is
+    *              populated with feature from a given window placed along
+    *              in a single row.
+    * @param[in]   audioData     pointer to the first element of audio data
+    * @param[in]   output        output to be populated
+    * @return      true if successful, false in case of error.
+    */
+    std::vector<int8_t> Invoke(const float* audioData, 
+                               size_t dataSize,
+                               int quantOffset,
+                               float quantScale) ;
+
+    uint32_t m_windowLen;       // Window length for MFCC
+    uint32_t m_windowStride;    // Window stride len for MFCC
+    std::unique_ptr<MFCC> m_mfcc;
+};
+} // namespace kws
+#endif //KEYWORD_SPOTTING_EXAMPLE_DSCNNPREPROCESSOR_HPP
diff --git a/samples/KeywordSpotting/include/DsCnnMfcc.hpp b/samples/KeywordSpotting/include/DsCnnMfcc.hpp
new file mode 100644
index 0000000000..851e010b22
--- /dev/null
+++ b/samples/KeywordSpotting/include/DsCnnMfcc.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "MFCC.hpp"
+
+/* Class to provide DS-CNN specific MFCC calculation requirements. */
+class DsCnnMFCC : public MFCC 
+{
+
+public:
+
+    explicit DsCnnMFCC(MfccParams& params)
+        :  MFCC(params)
+    {}
+    DsCnnMFCC()  = delete;
+    ~DsCnnMFCC() = default;
+};
diff --git a/samples/KeywordSpotting/include/KeywordSpottingPipeline.hpp b/samples/KeywordSpotting/include/KeywordSpottingPipeline.hpp
new file mode 100644
index 0000000000..bd47987a59
--- /dev/null
+++ b/samples/KeywordSpotting/include/KeywordSpottingPipeline.hpp
@@ -0,0 +1,91 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ArmnnNetworkExecutor.hpp"
+#include "Decoder.hpp"
+#include "MFCC.hpp"
+#include "DsCNNPreprocessor.hpp"
+
+namespace kws
+{
+/**
+ * Generic Keyword Spotting pipeline with 3 steps: data pre-processing, inference execution and inference
+ * result post-processing.
+ *
+ */
+class KWSPipeline
+{
+public:
+
+    /**
+     * Creates speech recognition pipeline with given network executor and decoder.
+     * @param executor - unique pointer to inference runner
+     * @param decoder - unique pointer to inference results decoder
+     */
+    KWSPipeline(std::unique_ptr<common::ArmnnNetworkExecutor<int8_t>> executor,
+                std::unique_ptr<Decoder> decoder,
+                std::unique_ptr<DsCNNPreprocessor> preProcessor);
+
+    /**
+     * @brief Standard audio pre-processing implementation.
+     *
+     * Preprocesses and prepares the data for inference by
+     * extracting the MFCC features.
+
+     * @param[in] audio - the raw audio data
+     */
+
+    std::vector<int8_t> PreProcessing(std::vector<float>& audio);
+
+    /**
+     * @brief Executes inference
+     *
+     * Calls inference runner provided during instance construction.
+     *
+     * @param[in] preprocessedData - input inference data. Data type should be aligned with input tensor.
+     * @param[out] result - raw inference results.
+     */
+    void Inference(const std::vector<int8_t>& preprocessedData, common::InferenceResults<int8_t>& result);
+
+    /**
+     * @brief Standard inference results post-processing implementation.
+     *
+     * Decodes inference results using decoder provided during construction.
+     *
+     * @param[in] inferenceResult - inference results to be decoded.
+     * @param[in] labels - the words we use for the model
+     */
+    void PostProcessing(common::InferenceResults<int8_t>& inferenceResults,
+                        std::map<int, std::string>& labels,
+                        const std::function<void (int, std::string&, float)>& callback);
+
+    /**
+     * @brief Get the number of samples for the pipeline input
+
+     * @return - number of samples for the pipeline
+     */
+    int getInputSamplesSize();
+
+protected:
+    std::unique_ptr<common::ArmnnNetworkExecutor<int8_t>> m_executor;
+    std::unique_ptr<Decoder> m_decoder;
+    std::unique_ptr<DsCNNPreprocessor> m_preProcessor;
+};
+
+using IPipelinePtr = std::unique_ptr<kws::KWSPipeline>;
+
+/**
+ * Constructs speech recognition pipeline based on configuration provided.
+ *
+ * @param[in] config - speech recognition pipeline configuration.
+ * @param[in] labels - asr labels
+ *
+ * @return unique pointer to asr pipeline.
+ */
+IPipelinePtr CreatePipeline(common::PipelineOptions& config);
+
+};// namespace kws
\ No newline at end of file
-- 
cgit v1.2.1