aboutsummaryrefslogtreecommitdiff
path: root/samples/KeywordSpotting/src
diff options
context:
space:
mode:
Diffstat (limited to 'samples/KeywordSpotting/src')
-rw-r--r--samples/KeywordSpotting/src/Decoder.cpp35
-rw-r--r--samples/KeywordSpotting/src/DsCNNPreprocessor.cpp40
-rw-r--r--samples/KeywordSpotting/src/KeywordSpottingPipeline.cpp94
-rw-r--r--samples/KeywordSpotting/src/Main.cpp128
4 files changed, 297 insertions, 0 deletions
diff --git a/samples/KeywordSpotting/src/Decoder.cpp b/samples/KeywordSpotting/src/Decoder.cpp
new file mode 100644
index 0000000000..107e25caa9
--- /dev/null
+++ b/samples/KeywordSpotting/src/Decoder.cpp
@@ -0,0 +1,35 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "Decoder.hpp"
+
+std::pair<int, float> kws::Decoder::decodeOutput(std::vector<int8_t>& modelOutput)
+{
+
+ std::vector<float> dequantisedOutput;
+ //Normalise vector values into new vector
+ for (auto& value : modelOutput)
+ {
+ float normalisedModelOutput = this->quantisationScale * (static_cast<float >(value) -
+ static_cast<float >(this->quantisationOffset));
+ dequantisedOutput.push_back(normalisedModelOutput);
+ }
+
+ //Get largest value in modelOutput
+ const std::vector<float>::iterator& maxElementIterator = std::max_element(dequantisedOutput.begin(),
+ dequantisedOutput.end());
+ //Find the labelMapIndex of the largest value which corresponds to a key in a label map
+ int labelMapIndex = static_cast<int>(std::distance(dequantisedOutput.begin(), maxElementIterator));
+
+ //Round to two DP
+ float maxModelOutputProbability = std::roundf((*maxElementIterator) * 100) / 100;
+
+ return std::make_pair(labelMapIndex, maxModelOutputProbability);
+
+}
+
+
+
+
diff --git a/samples/KeywordSpotting/src/DsCNNPreprocessor.cpp b/samples/KeywordSpotting/src/DsCNNPreprocessor.cpp
new file mode 100644
index 0000000000..8215feeeb5
--- /dev/null
+++ b/samples/KeywordSpotting/src/DsCNNPreprocessor.cpp
@@ -0,0 +1,40 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#include <cmath>
+#include <numeric>
+#include <algorithm>
+#include <memory>
+#include "MathUtils.hpp"
+#include "SlidingWindow.hpp"
+#include "DsCNNPreprocessor.hpp"
+
+std::vector<int8_t> kws::DsCNNPreprocessor::Invoke(const float* audioData, size_t dataSize,
+ int quantOffset, float quantScale)
+{
+ auto window = SlidingWindow<const float>(
+ audioData, dataSize,
+ this->m_windowLen, this->m_windowStride);
+
+ uint32_t mfccBufIdx = 0;
+ std::vector<int8_t> outputBuffer;
+ // While we can slide over the window
+ while (window.HasNext())
+ {
+ const float* mfccWindow = window.Next();
+ auto mfccAudioData = std::vector<float>(mfccWindow, mfccWindow + this->m_windowLen);
+
+ auto mfcc = this->m_mfcc->MfccComputeQuant<int8_t>(mfccAudioData, quantScale, quantOffset);
+
+ std::copy(mfcc.begin(), mfcc.end(), std::back_inserter(outputBuffer));
+
+ ++mfccBufIdx;
+ }
+
+ return outputBuffer;
+}
+
+kws::DsCNNPreprocessor::DsCNNPreprocessor(const uint32_t windowLen, const uint32_t windowStride,
+ std::unique_ptr<DsCnnMFCC> mfccInst) :
+ m_windowLen{windowLen}, m_windowStride{windowStride}, m_mfcc{std::move(mfccInst)} {}
diff --git a/samples/KeywordSpotting/src/KeywordSpottingPipeline.cpp b/samples/KeywordSpotting/src/KeywordSpottingPipeline.cpp
new file mode 100644
index 0000000000..e32d9476e3
--- /dev/null
+++ b/samples/KeywordSpotting/src/KeywordSpottingPipeline.cpp
@@ -0,0 +1,94 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "KeywordSpottingPipeline.hpp"
+#include "ArmnnNetworkExecutor.hpp"
+#include "DsCNNPreprocessor.hpp"
+
+namespace kws
+{
+KWSPipeline::KWSPipeline(std::unique_ptr<common::ArmnnNetworkExecutor<int8_t>> executor,
+ std::unique_ptr<Decoder> decoder,
+ std::unique_ptr<DsCNNPreprocessor> preProcessor
+ ) :
+ m_executor(std::move(executor)),
+ m_decoder(std::move(decoder)),
+ m_preProcessor(std::move(preProcessor)) {}
+
+
+std::vector<int8_t> KWSPipeline::PreProcessing(std::vector<float>& audio)
+{
+ return m_preProcessor->Invoke(audio.data(), audio.size(), m_executor->GetQuantizationOffset(),
+ m_executor->GetQuantizationScale());
+}
+
+void KWSPipeline::Inference(const std::vector<int8_t>& preprocessedData,
+ common::InferenceResults<int8_t>& result)
+{
+ m_executor->Run(preprocessedData.data(), preprocessedData.size(), result);
+}
+
+void KWSPipeline::PostProcessing(common::InferenceResults<int8_t>& inferenceResults,
+ std::map<int, std::string>& labels,
+ const std::function<void (int, std::string&, float)>& callback)
+{
+ std::pair<int,float> outputDecoder = this->m_decoder->decodeOutput(inferenceResults[0]);
+ int keywordIndex = std::get<0>(outputDecoder);
+ std::string output = labels[keywordIndex];
+ callback(keywordIndex, output, std::get<1>(outputDecoder));
+}
+
+int KWSPipeline::getInputSamplesSize()
+{
+ return this->m_preProcessor->m_windowLen +
+ ((this->m_preProcessor->m_mfcc->m_params.m_numMfccVectors - 1) *
+ this->m_preProcessor->m_windowStride);
+}
+
+IPipelinePtr CreatePipeline(common::PipelineOptions& config)
+{
+ if (config.m_ModelName == "DS_CNN_CLUSTERED_INT8")
+ {
+ //DS-CNN model settings
+ float SAMP_FREQ = 16000;
+ int MFCC_WINDOW_LEN = 640;
+ int MFCC_WINDOW_STRIDE = 320;
+ int NUM_MFCC_FEATS = 10;
+ int NUM_MFCC_VECTORS = 49;
+ //todo: calc in pipeline and use in main
+ int SAMPLES_PER_INFERENCE = NUM_MFCC_VECTORS * MFCC_WINDOW_STRIDE +
+ MFCC_WINDOW_LEN - MFCC_WINDOW_STRIDE; //16000
+ float MEL_LO_FREQ = 20;
+ float MEL_HI_FREQ = 4000;
+ int NUM_FBANK_BIN = 40;
+
+ MfccParams mfccParams(SAMP_FREQ,
+ NUM_FBANK_BIN,
+ MEL_LO_FREQ,
+ MEL_HI_FREQ,
+ NUM_MFCC_FEATS,
+ MFCC_WINDOW_LEN, false,
+ NUM_MFCC_VECTORS);
+
+ std::unique_ptr<DsCnnMFCC> mfccInst = std::make_unique<DsCnnMFCC>(mfccParams);
+ auto preprocessor = std::make_unique<kws::DsCNNPreprocessor>(
+ MFCC_WINDOW_LEN, MFCC_WINDOW_STRIDE, std::move(mfccInst));
+
+ auto executor = std::make_unique<common::ArmnnNetworkExecutor<int8_t>>(
+ config.m_ModelFilePath, config.m_backends);
+
+ auto decoder = std::make_unique<kws::Decoder>(executor->GetOutputQuantizationOffset(0),
+ executor->GetOutputQuantizationScale(0));
+
+ return std::make_unique<kws::KWSPipeline>(std::move(executor),
+ std::move(decoder), std::move(preprocessor));
+ }
+ else
+ {
+ throw std::invalid_argument("Unknown Model name: " + config.m_ModelName + " .");
+ }
+}
+
+};// namespace kws \ No newline at end of file
diff --git a/samples/KeywordSpotting/src/Main.cpp b/samples/KeywordSpotting/src/Main.cpp
new file mode 100644
index 0000000000..10efcd8ce7
--- /dev/null
+++ b/samples/KeywordSpotting/src/Main.cpp
@@ -0,0 +1,128 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#include <iostream>
+#include <map>
+#include <vector>
+#include <algorithm>
+#include <cmath>
+#include "KeywordSpottingPipeline.hpp"
+#include "CmdArgsParser.hpp"
+#include "ArmnnNetworkExecutor.hpp"
+#include "AudioCapture.hpp"
+
+const std::string AUDIO_FILE_PATH = "--audio-file-path";
+const std::string MODEL_FILE_PATH = "--model-file-path";
+const std::string LABEL_PATH = "--label-path";
+const std::string PREFERRED_BACKENDS = "--preferred-backends";
+const std::string HELP = "--help";
+
+/*
+ * The accepted options for this Speech Recognition executable
+ */
+static std::map<std::string, std::string> CMD_OPTIONS =
+{
+ {AUDIO_FILE_PATH, "[REQUIRED] Path to the Audio file to run speech recognition on"},
+ {MODEL_FILE_PATH, "[REQUIRED] Path to the Speech Recognition model to use"},
+ {PREFERRED_BACKENDS, "[OPTIONAL] Takes the preferred backends in preference order, separated by comma."
+ " For example: CpuAcc,GpuAcc,CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]."
+ " Defaults to CpuAcc,CpuRef"}
+};
+
+/*
+ * Reads the user supplied backend preference, splits it by comma, and returns an ordered vector
+ */
+std::vector<armnn::BackendId> GetPreferredBackendList(const std::string& preferredBackends)
+{
+ std::vector<armnn::BackendId> backends;
+ std::stringstream ss(preferredBackends);
+
+ while (ss.good())
+ {
+ std::string backend;
+ std::getline(ss, backend, ',');
+ backends.emplace_back(backend);
+ }
+ return backends;
+}
+
+//Labels for this model
+std::map<int, std::string> labels =
+{
+ {0, "silence"},
+ {1, "unknown"},
+ {2, "yes"},
+ {3, "no"},
+ {4, "up"},
+ {5, "down"},
+ {6, "left"},
+ {7, "right"},
+ {8, "on"},
+ {9, "off"},
+ {10, "stop"},
+ {11, "go"}
+};
+
+
+int main(int argc, char* argv[])
+{
+ printf("ArmNN major version: %d\n", ARMNN_MAJOR_VERSION);
+ std::map<std::string, std::string> options;
+
+ //Read command line args
+ int result = ParseOptions(options, CMD_OPTIONS, argv, argc);
+ if (result != 0)
+ {
+ return result;
+ }
+
+ // Create the ArmNN inference runner
+ common::PipelineOptions pipelineOptions;
+ pipelineOptions.m_ModelName = "DS_CNN_CLUSTERED_INT8";
+ pipelineOptions.m_ModelFilePath = GetSpecifiedOption(options, MODEL_FILE_PATH);
+ if (CheckOptionSpecified(options, PREFERRED_BACKENDS))
+ {
+ pipelineOptions.m_backends = GetPreferredBackendList(
+ (GetSpecifiedOption(options, PREFERRED_BACKENDS)));
+ }
+ else
+ {
+ pipelineOptions.m_backends = {"CpuAcc", "CpuRef"};
+ }
+
+ kws::IPipelinePtr kwsPipeline = kws::CreatePipeline(pipelineOptions);
+
+ //Extract audio data from sound file
+ auto filePath = GetSpecifiedOption(options, AUDIO_FILE_PATH);
+ std::vector<float> audioData = audio::AudioCapture::LoadAudioFile(filePath);
+
+ audio::AudioCapture capture;
+ //todo: read samples and stride from pipeline
+ capture.InitSlidingWindow(audioData.data(),
+ audioData.size(),
+ kwsPipeline->getInputSamplesSize(),
+ kwsPipeline->getInputSamplesSize()/2);
+
+ //Loop through audio data buffer
+ while (capture.HasNext())
+ {
+ std::vector<float> audioBlock = capture.Next();
+ common::InferenceResults<int8_t> results;
+
+ //Prepare input tensors
+ std::vector<int8_t> preprocessedData = kwsPipeline->PreProcessing(audioBlock);
+ //Run inference
+ kwsPipeline->Inference(preprocessedData, results);
+ //Decode output
+ kwsPipeline->PostProcessing(results, labels,
+ [](int index, std::string& label, float prob) -> void {
+ printf("Keyword \"%s\", index %d:, probability %f\n",
+ label.c_str(),
+ index,
+ prob);
+ });
+ }
+
+ return 0;
+} \ No newline at end of file