diff options
Diffstat (limited to 'samples/KeywordSpotting/src')
-rw-r--r-- | samples/KeywordSpotting/src/Decoder.cpp | 35 | ||||
-rw-r--r-- | samples/KeywordSpotting/src/DsCNNPreprocessor.cpp | 40 | ||||
-rw-r--r-- | samples/KeywordSpotting/src/KeywordSpottingPipeline.cpp | 94 | ||||
-rw-r--r-- | samples/KeywordSpotting/src/Main.cpp | 128 |
4 files changed, 297 insertions, 0 deletions
diff --git a/samples/KeywordSpotting/src/Decoder.cpp b/samples/KeywordSpotting/src/Decoder.cpp new file mode 100644 index 0000000000..107e25caa9 --- /dev/null +++ b/samples/KeywordSpotting/src/Decoder.cpp @@ -0,0 +1,35 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "Decoder.hpp" + +std::pair<int, float> kws::Decoder::decodeOutput(std::vector<int8_t>& modelOutput) +{ + + std::vector<float> dequantisedOutput; + //Normalise vector values into new vector + for (auto& value : modelOutput) + { + float normalisedModelOutput = this->quantisationScale * (static_cast<float >(value) - + static_cast<float >(this->quantisationOffset)); + dequantisedOutput.push_back(normalisedModelOutput); + } + + //Get largest value in modelOutput + const std::vector<float>::iterator& maxElementIterator = std::max_element(dequantisedOutput.begin(), + dequantisedOutput.end()); + //Find the labelMapIndex of the largest value which corresponds to a key in a label map + int labelMapIndex = static_cast<int>(std::distance(dequantisedOutput.begin(), maxElementIterator)); + + //Round to two DP + float maxModelOutputProbability = std::roundf((*maxElementIterator) * 100) / 100; + + return std::make_pair(labelMapIndex, maxModelOutputProbability); + +} + + + + diff --git a/samples/KeywordSpotting/src/DsCNNPreprocessor.cpp b/samples/KeywordSpotting/src/DsCNNPreprocessor.cpp new file mode 100644 index 0000000000..8215feeeb5 --- /dev/null +++ b/samples/KeywordSpotting/src/DsCNNPreprocessor.cpp @@ -0,0 +1,40 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include <cmath> +#include <numeric> +#include <algorithm> +#include <memory> +#include "MathUtils.hpp" +#include "SlidingWindow.hpp" +#include "DsCNNPreprocessor.hpp" + +std::vector<int8_t> kws::DsCNNPreprocessor::Invoke(const float* audioData, size_t dataSize, + int quantOffset, float quantScale) +{ + auto window = SlidingWindow<const float>( + audioData, dataSize, + this->m_windowLen, this->m_windowStride); + + uint32_t mfccBufIdx = 0; + std::vector<int8_t> outputBuffer; + // While we can slide over the window + while (window.HasNext()) + { + const float* mfccWindow = window.Next(); + auto mfccAudioData = std::vector<float>(mfccWindow, mfccWindow + this->m_windowLen); + + auto mfcc = this->m_mfcc->MfccComputeQuant<int8_t>(mfccAudioData, quantScale, quantOffset); + + std::copy(mfcc.begin(), mfcc.end(), std::back_inserter(outputBuffer)); + + ++mfccBufIdx; + } + + return outputBuffer; +} + +kws::DsCNNPreprocessor::DsCNNPreprocessor(const uint32_t windowLen, const uint32_t windowStride, + std::unique_ptr<DsCnnMFCC> mfccInst) : + m_windowLen{windowLen}, m_windowStride{windowStride}, m_mfcc{std::move(mfccInst)} {} diff --git a/samples/KeywordSpotting/src/KeywordSpottingPipeline.cpp b/samples/KeywordSpotting/src/KeywordSpottingPipeline.cpp new file mode 100644 index 0000000000..e32d9476e3 --- /dev/null +++ b/samples/KeywordSpotting/src/KeywordSpottingPipeline.cpp @@ -0,0 +1,94 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "KeywordSpottingPipeline.hpp" +#include "ArmnnNetworkExecutor.hpp" +#include "DsCNNPreprocessor.hpp" + +namespace kws +{ +KWSPipeline::KWSPipeline(std::unique_ptr<common::ArmnnNetworkExecutor<int8_t>> executor, + std::unique_ptr<Decoder> decoder, + std::unique_ptr<DsCNNPreprocessor> preProcessor + ) : + m_executor(std::move(executor)), + m_decoder(std::move(decoder)), + m_preProcessor(std::move(preProcessor)) {} + + +std::vector<int8_t> KWSPipeline::PreProcessing(std::vector<float>& audio) +{ + return m_preProcessor->Invoke(audio.data(), audio.size(), m_executor->GetQuantizationOffset(), + m_executor->GetQuantizationScale()); +} + +void KWSPipeline::Inference(const std::vector<int8_t>& preprocessedData, + common::InferenceResults<int8_t>& result) +{ + m_executor->Run(preprocessedData.data(), preprocessedData.size(), result); +} + +void KWSPipeline::PostProcessing(common::InferenceResults<int8_t>& inferenceResults, + std::map<int, std::string>& labels, + const std::function<void (int, std::string&, float)>& callback) +{ + std::pair<int,float> outputDecoder = this->m_decoder->decodeOutput(inferenceResults[0]); + int keywordIndex = std::get<0>(outputDecoder); + std::string output = labels[keywordIndex]; + callback(keywordIndex, output, std::get<1>(outputDecoder)); +} + +int KWSPipeline::getInputSamplesSize() +{ + return this->m_preProcessor->m_windowLen + + ((this->m_preProcessor->m_mfcc->m_params.m_numMfccVectors - 1) * + this->m_preProcessor->m_windowStride); +} + +IPipelinePtr CreatePipeline(common::PipelineOptions& config) +{ + if (config.m_ModelName == "DS_CNN_CLUSTERED_INT8") + { + //DS-CNN model settings + float SAMP_FREQ = 16000; + int MFCC_WINDOW_LEN = 640; + int MFCC_WINDOW_STRIDE = 320; + int NUM_MFCC_FEATS = 10; + int NUM_MFCC_VECTORS = 49; + //todo: calc in pipeline and use in main + int SAMPLES_PER_INFERENCE = NUM_MFCC_VECTORS * MFCC_WINDOW_STRIDE + + MFCC_WINDOW_LEN - MFCC_WINDOW_STRIDE; //16000 + float MEL_LO_FREQ = 20; + float MEL_HI_FREQ = 4000; + int NUM_FBANK_BIN = 40; + + MfccParams mfccParams(SAMP_FREQ, + NUM_FBANK_BIN, + MEL_LO_FREQ, + MEL_HI_FREQ, + NUM_MFCC_FEATS, + MFCC_WINDOW_LEN, false, + NUM_MFCC_VECTORS); + + std::unique_ptr<DsCnnMFCC> mfccInst = std::make_unique<DsCnnMFCC>(mfccParams); + auto preprocessor = std::make_unique<kws::DsCNNPreprocessor>( + MFCC_WINDOW_LEN, MFCC_WINDOW_STRIDE, std::move(mfccInst)); + + auto executor = std::make_unique<common::ArmnnNetworkExecutor<int8_t>>( + config.m_ModelFilePath, config.m_backends); + + auto decoder = std::make_unique<kws::Decoder>(executor->GetOutputQuantizationOffset(0), + executor->GetOutputQuantizationScale(0)); + + return std::make_unique<kws::KWSPipeline>(std::move(executor), + std::move(decoder), std::move(preprocessor)); + } + else + { + throw std::invalid_argument("Unknown Model name: " + config.m_ModelName + " ."); + } +} + +};// namespace kws
\ No newline at end of file diff --git a/samples/KeywordSpotting/src/Main.cpp b/samples/KeywordSpotting/src/Main.cpp new file mode 100644 index 0000000000..10efcd8ce7 --- /dev/null +++ b/samples/KeywordSpotting/src/Main.cpp @@ -0,0 +1,128 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include <iostream> +#include <map> +#include <vector> +#include <algorithm> +#include <cmath> +#include "KeywordSpottingPipeline.hpp" +#include "CmdArgsParser.hpp" +#include "ArmnnNetworkExecutor.hpp" +#include "AudioCapture.hpp" + +const std::string AUDIO_FILE_PATH = "--audio-file-path"; +const std::string MODEL_FILE_PATH = "--model-file-path"; +const std::string LABEL_PATH = "--label-path"; +const std::string PREFERRED_BACKENDS = "--preferred-backends"; +const std::string HELP = "--help"; + +/* + * The accepted options for this Speech Recognition executable + */ +static std::map<std::string, std::string> CMD_OPTIONS = +{ + {AUDIO_FILE_PATH, "[REQUIRED] Path to the Audio file to run speech recognition on"}, + {MODEL_FILE_PATH, "[REQUIRED] Path to the Speech Recognition model to use"}, + {PREFERRED_BACKENDS, "[OPTIONAL] Takes the preferred backends in preference order, separated by comma." + " For example: CpuAcc,GpuAcc,CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]." + " Defaults to CpuAcc,CpuRef"} +}; + +/* + * Reads the user supplied backend preference, splits it by comma, and returns an ordered vector + */ +std::vector<armnn::BackendId> GetPreferredBackendList(const std::string& preferredBackends) +{ + std::vector<armnn::BackendId> backends; + std::stringstream ss(preferredBackends); + + while (ss.good()) + { + std::string backend; + std::getline(ss, backend, ','); + backends.emplace_back(backend); + } + return backends; +} + +//Labels for this model +std::map<int, std::string> labels = +{ + {0, "silence"}, + {1, "unknown"}, + {2, "yes"}, + {3, "no"}, + {4, "up"}, + {5, "down"}, + {6, "left"}, + {7, "right"}, + {8, "on"}, + {9, "off"}, + {10, "stop"}, + {11, "go"} +}; + + +int main(int argc, char* argv[]) +{ + printf("ArmNN major version: %d\n", ARMNN_MAJOR_VERSION); + std::map<std::string, std::string> options; + + //Read command line args + int result = ParseOptions(options, CMD_OPTIONS, argv, argc); + if (result != 0) + { + return result; + } + + // Create the ArmNN inference runner + common::PipelineOptions pipelineOptions; + pipelineOptions.m_ModelName = "DS_CNN_CLUSTERED_INT8"; + pipelineOptions.m_ModelFilePath = GetSpecifiedOption(options, MODEL_FILE_PATH); + if (CheckOptionSpecified(options, PREFERRED_BACKENDS)) + { + pipelineOptions.m_backends = GetPreferredBackendList( + (GetSpecifiedOption(options, PREFERRED_BACKENDS))); + } + else + { + pipelineOptions.m_backends = {"CpuAcc", "CpuRef"}; + } + + kws::IPipelinePtr kwsPipeline = kws::CreatePipeline(pipelineOptions); + + //Extract audio data from sound file + auto filePath = GetSpecifiedOption(options, AUDIO_FILE_PATH); + std::vector<float> audioData = audio::AudioCapture::LoadAudioFile(filePath); + + audio::AudioCapture capture; + //todo: read samples and stride from pipeline + capture.InitSlidingWindow(audioData.data(), + audioData.size(), + kwsPipeline->getInputSamplesSize(), + kwsPipeline->getInputSamplesSize()/2); + + //Loop through audio data buffer + while (capture.HasNext()) + { + std::vector<float> audioBlock = capture.Next(); + common::InferenceResults<int8_t> results; + + //Prepare input tensors + std::vector<int8_t> preprocessedData = kwsPipeline->PreProcessing(audioBlock); + //Run inference + kwsPipeline->Inference(preprocessedData, results); + //Decode output + kwsPipeline->PostProcessing(results, labels, + [](int index, std::string& label, float prob) -> void { + printf("Keyword \"%s\", index %d:, probability %f\n", + label.c_str(), + index, + prob); + }); + } + + return 0; +}
\ No newline at end of file |