// // Copyright © 2021 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include #include #include #include #include #include "KeywordSpottingPipeline.hpp" #include "CmdArgsParser.hpp" #include "ArmnnNetworkExecutor.hpp" #include "AudioCapture.hpp" const std::string AUDIO_FILE_PATH = "--audio-file-path"; const std::string MODEL_FILE_PATH = "--model-file-path"; const std::string LABEL_PATH = "--label-path"; const std::string PREFERRED_BACKENDS = "--preferred-backends"; const std::string HELP = "--help"; /* * The accepted options for this Speech Recognition executable */ static std::map CMD_OPTIONS = { {AUDIO_FILE_PATH, "[REQUIRED] Path to the Audio file to run speech recognition on"}, {MODEL_FILE_PATH, "[REQUIRED] Path to the Speech Recognition model to use"}, {PREFERRED_BACKENDS, "[OPTIONAL] Takes the preferred backends in preference order, separated by comma." " For example: CpuAcc,GpuAcc,CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]." " Defaults to CpuAcc,CpuRef"} }; /* * Reads the user supplied backend preference, splits it by comma, and returns an ordered vector */ std::vector GetPreferredBackendList(const std::string& preferredBackends) { std::vector backends; std::stringstream ss(preferredBackends); while (ss.good()) { std::string backend; std::getline(ss, backend, ','); backends.emplace_back(backend); } return backends; } //Labels for this model std::map labels = { {0, "silence"}, {1, "unknown"}, {2, "yes"}, {3, "no"}, {4, "up"}, {5, "down"}, {6, "left"}, {7, "right"}, {8, "on"}, {9, "off"}, {10, "stop"}, {11, "go"} }; int main(int argc, char* argv[]) { printf("ArmNN major version: %d\n", ARMNN_MAJOR_VERSION); std::map options; //Read command line args int result = ParseOptions(options, CMD_OPTIONS, argv, argc); if (result != 0) { return result; } // Create the ArmNN inference runner common::PipelineOptions pipelineOptions; pipelineOptions.m_ModelName = "DS_CNN_CLUSTERED_INT8"; pipelineOptions.m_ModelFilePath = GetSpecifiedOption(options, MODEL_FILE_PATH); if (CheckOptionSpecified(options, PREFERRED_BACKENDS)) { pipelineOptions.m_backends = GetPreferredBackendList( (GetSpecifiedOption(options, PREFERRED_BACKENDS))); } else { pipelineOptions.m_backends = {"CpuAcc", "CpuRef"}; } kws::IPipelinePtr kwsPipeline = kws::CreatePipeline(pipelineOptions); //Extract audio data from sound file auto filePath = GetSpecifiedOption(options, AUDIO_FILE_PATH); std::vector audioData = audio::AudioCapture::LoadAudioFile(filePath); audio::AudioCapture capture; //todo: read samples and stride from pipeline capture.InitSlidingWindow(audioData.data(), audioData.size(), kwsPipeline->getInputSamplesSize(), kwsPipeline->getInputSamplesSize()/2); //Loop through audio data buffer while (capture.HasNext()) { std::vector audioBlock = capture.Next(); common::InferenceResults results; //Prepare input tensors std::vector preprocessedData = kwsPipeline->PreProcessing(audioBlock); //Run inference kwsPipeline->Inference(preprocessedData, results); //Decode output kwsPipeline->PostProcessing(results, labels, [](int index, std::string& label, float prob) -> void { printf("Keyword \"%s\", index %d:, probability %f\n", label.c_str(), index, prob); }); } return 0; }