diff options
Diffstat (limited to 'source/use_case/asr/src/MainLoop.cc')
-rw-r--r-- | source/use_case/asr/src/MainLoop.cc | 230 |
1 files changed, 230 insertions, 0 deletions
diff --git a/source/use_case/asr/src/MainLoop.cc b/source/use_case/asr/src/MainLoop.cc new file mode 100644 index 0000000..ca777be --- /dev/null +++ b/source/use_case/asr/src/MainLoop.cc @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2021 Arm Limited. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "hal.h" /* Brings in platform definitions. */ +#include "Labels.hpp" /* For label strings. */ +#include "UseCaseHandler.hpp" /* Handlers for different user options. */ +#include "Wav2LetterModel.hpp" /* Model class for running inference. */ +#include "UseCaseCommonUtils.hpp" /* Utils functions. */ +#include "AsrClassifier.hpp" /* Classifier. */ +#include "InputFiles.hpp" /* Generated audio clip header. */ +#include "Wav2LetterPreprocess.hpp" /* Pre-processing class. */ +#include "Wav2LetterPostprocess.hpp" /* Post-processing class. */ + +enum opcodes +{ + MENU_OPT_RUN_INF_NEXT = 1, /* Run on next vector. */ + MENU_OPT_RUN_INF_CHOSEN, /* Run on a user provided vector index. */ + MENU_OPT_RUN_INF_ALL, /* Run inference on all. */ + MENU_OPT_SHOW_MODEL_INFO, /* Show model info. */ + MENU_OPT_LIST_AUDIO_CLIPS /* List the current baked audio clips. */ +}; + +static void DisplayMenu() +{ + printf("\n\nUser input required\n"); + printf("Enter option number from:\n\n"); + printf(" %u. Classify next audio clip\n", MENU_OPT_RUN_INF_NEXT); + printf(" %u. Classify audio clip at chosen index\n", MENU_OPT_RUN_INF_CHOSEN); + printf(" %u. Run classification on all audio clips\n", MENU_OPT_RUN_INF_ALL); + printf(" %u. Show NN model info\n", MENU_OPT_SHOW_MODEL_INFO); + printf(" %u. List audio clips\n\n", MENU_OPT_LIST_AUDIO_CLIPS); + printf(" Choice: "); +} + +/** @brief Verify input and output tensor are of certain min dimensions. */ +static bool VerifyTensorDimensions(const arm::app::Model& model); + +/** @brief Gets the number of MFCC features for a single window. */ +static uint32_t GetNumMfccFeatures(const arm::app::Model& model); + +/** @brief Gets the number of MFCC feature vectors to be computed. */ +static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model); + +/** @brief Gets the output context length (left and right) for post-processing. */ +static uint32_t GetOutputContextLen(const arm::app::Model& model, + uint32_t inputCtxLen); + +/** @brief Gets the output inner length for post-processing. */ +static uint32_t GetOutputInnerLen(const arm::app::Model& model, + uint32_t outputCtxLen); + +void main_loop(hal_platform& platform) +{ + arm::app::Wav2LetterModel model; /* Model wrapper object. */ + + /* Load the model. */ + if (!model.Init()) { + printf_err("Failed to initialise model\n"); + return; + } else if (!VerifyTensorDimensions(model)) { + printf_err("Model's input or output dimension verification failed\n"); + return; + } + + /* Initialise pre-processing. */ + arm::app::audio::asr::Preprocess prep( + GetNumMfccFeatures(model), + g_FrameLength, + g_FrameStride, + GetNumMfccFeatureVectors(model)); + + /* Initialise post-processing. */ + const uint32_t outputCtxLen = GetOutputContextLen(model, g_ctxLen); + const uint32_t blankTokenIdx = 28; + arm::app::audio::asr::Postprocess postp( + outputCtxLen, + GetOutputInnerLen(model, outputCtxLen), + blankTokenIdx); + + /* Instantiate application context. */ + arm::app::ApplicationContext caseContext; + std::vector <std::string> labels; + GetLabelsVector(labels); + arm::app::AsrClassifier classifier; /* Classifier wrapper object. */ + + caseContext.Set<hal_platform&>("platform", platform); + caseContext.Set<arm::app::Model&>("model", model); + caseContext.Set<uint32_t>("clipIndex", 0); + caseContext.Set<uint32_t>("frameLength", g_FrameLength); + caseContext.Set<uint32_t>("frameStride", g_FrameStride); + caseContext.Set<float>("scoreThreshold", g_ScoreThreshold); /* Score threshold. */ + caseContext.Set<uint32_t>("ctxLen", g_ctxLen); /* Left and right context length (MFCC feat vectors). */ + caseContext.Set<const std::vector <std::string>&>("labels", labels); + caseContext.Set<arm::app::AsrClassifier&>("classifier", classifier); + caseContext.Set<arm::app::audio::asr::Preprocess&>("preprocess", prep); + caseContext.Set<arm::app::audio::asr::Postprocess&>("postprocess", postp); + + bool executionSuccessful = true; + constexpr bool bUseMenu = NUMBER_OF_FILES > 1 ? true : false; + + /* Loop. */ + do { + int menuOption = MENU_OPT_RUN_INF_NEXT; + if (bUseMenu) { + DisplayMenu(); + menuOption = arm::app::ReadUserInputAsInt(platform); + printf("\n"); + } + switch (menuOption) { + case MENU_OPT_RUN_INF_NEXT: + executionSuccessful = ClassifyAudioHandler( + caseContext, + caseContext.Get<uint32_t>("clipIndex"), + false); + break; + case MENU_OPT_RUN_INF_CHOSEN: { + printf(" Enter the audio clip index [0, %d]: ", + NUMBER_OF_FILES-1); + auto clipIndex = static_cast<uint32_t>( + arm::app::ReadUserInputAsInt(platform)); + executionSuccessful = ClassifyAudioHandler(caseContext, + clipIndex, + false); + break; + } + case MENU_OPT_RUN_INF_ALL: + executionSuccessful = ClassifyAudioHandler( + caseContext, + caseContext.Get<uint32_t>("clipIndex"), + true); + break; + case MENU_OPT_SHOW_MODEL_INFO: + executionSuccessful = model.ShowModelInfoHandler(); + break; + case MENU_OPT_LIST_AUDIO_CLIPS: + executionSuccessful = ListFilesHandler(caseContext); + break; + default: + printf("Incorrect choice, try again."); + break; + } + } while (executionSuccessful && bUseMenu); + info("Main loop terminated.\n"); +} + +static bool VerifyTensorDimensions(const arm::app::Model& model) +{ + /* Populate tensor related parameters. */ + TfLiteTensor* inputTensor = model.GetInputTensor(0); + if (!inputTensor->dims) { + printf_err("Invalid input tensor dims\n"); + return false; + } else if (inputTensor->dims->size < 3) { + printf_err("Input tensor dimension should be >= 3\n"); + return false; + } + + TfLiteTensor* outputTensor = model.GetOutputTensor(0); + if (!outputTensor->dims) { + printf_err("Invalid output tensor dims\n"); + return false; + } else if (outputTensor->dims->size < 3) { + printf_err("Output tensor dimension should be >= 3\n"); + return false; + } + + return true; +} + +static uint32_t GetNumMfccFeatures(const arm::app::Model& model) +{ + TfLiteTensor* inputTensor = model.GetInputTensor(0); + const int inputCols = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputColsIdx]; + if (0 != inputCols % 3) { + printf_err("Number of input columns is not a multiple of 3\n"); + } + return std::max(inputCols/3, 0); +} + +static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model) +{ + TfLiteTensor* inputTensor = model.GetInputTensor(0); + const int inputRows = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputRowsIdx]; + return std::max(inputRows, 0); +} + +static uint32_t GetOutputContextLen(const arm::app::Model& model, const uint32_t inputCtxLen) +{ + const uint32_t inputRows = GetNumMfccFeatureVectors(model); + const uint32_t inputInnerLen = inputRows - (2 * inputCtxLen); + constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx; + + /* Check to make sure that the input tensor supports the above + * context and inner lengths. */ + if (inputRows <= 2 * inputCtxLen || inputRows <= inputInnerLen) { + printf_err("Input rows not compatible with ctx of %u\n", + inputCtxLen); + return 0; + } + + TfLiteTensor* outputTensor = model.GetOutputTensor(0); + const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0); + + const float tensorColRatio = static_cast<float>(inputRows)/ + static_cast<float>(outputRows); + + return std::round(static_cast<float>(inputCtxLen)/tensorColRatio); +} + +static uint32_t GetOutputInnerLen(const arm::app::Model& model, + const uint32_t outputCtxLen) +{ + constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx; + TfLiteTensor* outputTensor = model.GetOutputTensor(0); + const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0); + return (outputRows - (2 * outputCtxLen)); +} |