aboutsummaryrefslogtreecommitdiff
path: root/samples/KeywordSpotting/include
diff options
context:
space:
mode:
Diffstat (limited to 'samples/KeywordSpotting/include')
-rw-r--r--samples/KeywordSpotting/include/Decoder.hpp32
-rw-r--r--samples/KeywordSpotting/include/DsCNNPreprocessor.hpp39
-rw-r--r--samples/KeywordSpotting/include/DsCnnMfcc.hpp20
-rw-r--r--samples/KeywordSpotting/include/KeywordSpottingPipeline.hpp91
4 files changed, 182 insertions, 0 deletions
diff --git a/samples/KeywordSpotting/include/Decoder.hpp b/samples/KeywordSpotting/include/Decoder.hpp
new file mode 100644
index 0000000000..aca68312bc
--- /dev/null
+++ b/samples/KeywordSpotting/include/Decoder.hpp
@@ -0,0 +1,32 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+# pragma once
+
+#include <string>
+#include <map>
+#include "ArmnnNetworkExecutor.hpp"
+
+namespace kws
+{
+
+/**
+* @brief Decodes quantised last layer of model output
+*
+*/
+class Decoder
+{
+private:
+ int quantisationOffset;
+ float quantisationScale;
+
+public:
+
+ Decoder(int quantisationOffset, float quantisationScale) : quantisationOffset(quantisationOffset),
+ quantisationScale(quantisationScale) {}
+
+ std::pair<int, float> decodeOutput(std::vector<int8_t>& modelOutput);
+
+};
+} // namespace kws \ No newline at end of file
diff --git a/samples/KeywordSpotting/include/DsCNNPreprocessor.hpp b/samples/KeywordSpotting/include/DsCNNPreprocessor.hpp
new file mode 100644
index 0000000000..b635d1a41e
--- /dev/null
+++ b/samples/KeywordSpotting/include/DsCNNPreprocessor.hpp
@@ -0,0 +1,39 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#ifndef KEYWORD_SPOTTING_EXAMPLE_DSCNNPREPROCESSOR_HPP
+#define KEYWORD_SPOTTING_EXAMPLE_DSCNNPREPROCESSOR_HPP
+
+#include <numeric>
+#include "DsCnnMfcc.hpp"
+
+namespace kws
+{
+class DsCNNPreprocessor
+{
+public:
+ DsCNNPreprocessor(uint32_t windowLen, uint32_t windowStride,
+ std::unique_ptr<DsCnnMFCC> mfccInst);
+
+ /**
+ * @brief Calculates the features required from audio data. This
+ * includes MFCC, first and second order deltas,
+ * normalisation and finally, quantisation. The tensor is
+ * populated with feature from a given window placed along
+ * in a single row.
+ * @param[in] audioData pointer to the first element of audio data
+ * @param[in] output output to be populated
+ * @return true if successful, false in case of error.
+ */
+ std::vector<int8_t> Invoke(const float* audioData,
+ size_t dataSize,
+ int quantOffset,
+ float quantScale) ;
+
+ uint32_t m_windowLen; // Window length for MFCC
+ uint32_t m_windowStride; // Window stride len for MFCC
+ std::unique_ptr<MFCC> m_mfcc;
+};
+} // namespace kws
+#endif //KEYWORD_SPOTTING_EXAMPLE_DSCNNPREPROCESSOR_HPP
diff --git a/samples/KeywordSpotting/include/DsCnnMfcc.hpp b/samples/KeywordSpotting/include/DsCnnMfcc.hpp
new file mode 100644
index 0000000000..851e010b22
--- /dev/null
+++ b/samples/KeywordSpotting/include/DsCnnMfcc.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "MFCC.hpp"
+
+/* Class to provide DS-CNN specific MFCC calculation requirements. */
+class DsCnnMFCC : public MFCC
+{
+
+public:
+
+ explicit DsCnnMFCC(MfccParams& params)
+ : MFCC(params)
+ {}
+ DsCnnMFCC() = delete;
+ ~DsCnnMFCC() = default;
+};
diff --git a/samples/KeywordSpotting/include/KeywordSpottingPipeline.hpp b/samples/KeywordSpotting/include/KeywordSpottingPipeline.hpp
new file mode 100644
index 0000000000..bd47987a59
--- /dev/null
+++ b/samples/KeywordSpotting/include/KeywordSpottingPipeline.hpp
@@ -0,0 +1,91 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ArmnnNetworkExecutor.hpp"
+#include "Decoder.hpp"
+#include "MFCC.hpp"
+#include "DsCNNPreprocessor.hpp"
+
+namespace kws
+{
+/**
+ * Generic Keyword Spotting pipeline with 3 steps: data pre-processing, inference execution and inference
+ * result post-processing.
+ *
+ */
+class KWSPipeline
+{
+public:
+
+ /**
+ * Creates speech recognition pipeline with given network executor and decoder.
+ * @param executor - unique pointer to inference runner
+ * @param decoder - unique pointer to inference results decoder
+ */
+ KWSPipeline(std::unique_ptr<common::ArmnnNetworkExecutor<int8_t>> executor,
+ std::unique_ptr<Decoder> decoder,
+ std::unique_ptr<DsCNNPreprocessor> preProcessor);
+
+ /**
+ * @brief Standard audio pre-processing implementation.
+ *
+ * Preprocesses and prepares the data for inference by
+ * extracting the MFCC features.
+
+ * @param[in] audio - the raw audio data
+ */
+
+ std::vector<int8_t> PreProcessing(std::vector<float>& audio);
+
+ /**
+ * @brief Executes inference
+ *
+ * Calls inference runner provided during instance construction.
+ *
+ * @param[in] preprocessedData - input inference data. Data type should be aligned with input tensor.
+ * @param[out] result - raw inference results.
+ */
+ void Inference(const std::vector<int8_t>& preprocessedData, common::InferenceResults<int8_t>& result);
+
+ /**
+ * @brief Standard inference results post-processing implementation.
+ *
+ * Decodes inference results using decoder provided during construction.
+ *
+ * @param[in] inferenceResult - inference results to be decoded.
+ * @param[in] labels - the words we use for the model
+ */
+ void PostProcessing(common::InferenceResults<int8_t>& inferenceResults,
+ std::map<int, std::string>& labels,
+ const std::function<void (int, std::string&, float)>& callback);
+
+ /**
+ * @brief Get the number of samples for the pipeline input
+
+ * @return - number of samples for the pipeline
+ */
+ int getInputSamplesSize();
+
+protected:
+ std::unique_ptr<common::ArmnnNetworkExecutor<int8_t>> m_executor;
+ std::unique_ptr<Decoder> m_decoder;
+ std::unique_ptr<DsCNNPreprocessor> m_preProcessor;
+};
+
+using IPipelinePtr = std::unique_ptr<kws::KWSPipeline>;
+
+/**
+ * Constructs speech recognition pipeline based on configuration provided.
+ *
+ * @param[in] config - speech recognition pipeline configuration.
+ * @param[in] labels - asr labels
+ *
+ * @return unique pointer to asr pipeline.
+ */
+IPipelinePtr CreatePipeline(common::PipelineOptions& config);
+
+};// namespace kws \ No newline at end of file