diff options
Diffstat (limited to 'source/use_case/asr/src')
-rw-r--r-- | source/use_case/asr/src/AsrClassifier.cc | 39 | ||||
-rw-r--r-- | source/use_case/asr/src/UseCaseHandler.cc | 20 | ||||
-rw-r--r-- | source/use_case/asr/src/Wav2LetterMfcc.cc | 23 | ||||
-rw-r--r-- | source/use_case/asr/src/Wav2LetterPostprocess.cc | 32 | ||||
-rw-r--r-- | source/use_case/asr/src/Wav2LetterPreprocess.cc | 38 |
5 files changed, 80 insertions, 72 deletions
diff --git a/source/use_case/asr/src/AsrClassifier.cc b/source/use_case/asr/src/AsrClassifier.cc index 7377d30..df26a7f 100644 --- a/source/use_case/asr/src/AsrClassifier.cc +++ b/source/use_case/asr/src/AsrClassifier.cc @@ -21,13 +21,18 @@ #include "Wav2LetterModel.hpp" template<typename T> -bool arm::app::AsrClassifier::_GetTopResults(TfLiteTensor* tensor, - std::vector<ClassificationResult>& vecResults, - const std::vector <std::string>& labels, double scale, double zeroPoint) +bool arm::app::AsrClassifier::GetTopResults(TfLiteTensor* tensor, + std::vector<ClassificationResult>& vecResults, + const std::vector <std::string>& labels, double scale, double zeroPoint) { const uint32_t nElems = tensor->dims->data[arm::app::Wav2LetterModel::ms_outputRowsIdx]; const uint32_t nLetters = tensor->dims->data[arm::app::Wav2LetterModel::ms_outputColsIdx]; + if (nLetters != labels.size()) { + printf("Output size doesn't match the labels' size\n"); + return false; + } + /* NOTE: tensor's size verification against labels should be * checked by the calling/public function. */ if (nLetters < 1) { @@ -58,12 +63,12 @@ bool arm::app::AsrClassifier::_GetTopResults(TfLiteTensor* tensor, return true; } -template bool arm::app::AsrClassifier::_GetTopResults<uint8_t>(TfLiteTensor* tensor, - std::vector<ClassificationResult>& vecResults, - const std::vector <std::string>& labels, double scale, double zeroPoint); -template bool arm::app::AsrClassifier::_GetTopResults<int8_t>(TfLiteTensor* tensor, - std::vector<ClassificationResult>& vecResults, - const std::vector <std::string>& labels, double scale, double zeroPoint); +template bool arm::app::AsrClassifier::GetTopResults<uint8_t>(TfLiteTensor* tensor, + std::vector<ClassificationResult>& vecResults, + const std::vector <std::string>& labels, double scale, double zeroPoint); +template bool arm::app::AsrClassifier::GetTopResults<int8_t>(TfLiteTensor* tensor, + std::vector<ClassificationResult>& vecResults, + const std::vector <std::string>& labels, double scale, double zeroPoint); bool arm::app::AsrClassifier::GetClassificationResults( TfLiteTensor* outputTensor, @@ -104,16 +109,16 @@ bool arm::app::AsrClassifier::GetClassificationResults( switch (outputTensor->type) { case kTfLiteUInt8: - resultState = this->_GetTopResults<uint8_t>( - outputTensor, vecResults, - labels, quantParams.scale, - quantParams.offset); + resultState = this->GetTopResults<uint8_t>( + outputTensor, vecResults, + labels, quantParams.scale, + quantParams.offset); break; case kTfLiteInt8: - resultState = this->_GetTopResults<int8_t>( - outputTensor, vecResults, - labels, quantParams.scale, - quantParams.offset); + resultState = this->GetTopResults<int8_t>( + outputTensor, vecResults, + labels, quantParams.scale, + quantParams.offset); break; default: printf_err("Tensor type %s not supported by classifier\n", diff --git a/source/use_case/asr/src/UseCaseHandler.cc b/source/use_case/asr/src/UseCaseHandler.cc index 5d3157a..efaefc2 100644 --- a/source/use_case/asr/src/UseCaseHandler.cc +++ b/source/use_case/asr/src/UseCaseHandler.cc @@ -35,7 +35,7 @@ namespace app { * @brief Helper function to increment current audio clip index. * @param[in,out] ctx Pointer to the application context object. **/ - static void _IncrementAppCtxClipIdx(ApplicationContext& ctx); + static void IncrementAppCtxClipIdx(ApplicationContext& ctx); /** * @brief Helper function to set the audio clip index. @@ -43,7 +43,7 @@ namespace app { * @param[in] idx Value to be set. * @return true if index is set, false otherwise. **/ - static bool _SetAppCtxClipIdx(ApplicationContext& ctx, uint32_t idx); + static bool SetAppCtxClipIdx(ApplicationContext& ctx, uint32_t idx); /** * @brief Presents inference results using the data presentation @@ -54,7 +54,7 @@ namespace app { * otherwise, this can be passed in as 0. * @return true if successful, false otherwise. **/ - static bool _PresentInferenceResult( + static bool PresentInferenceResult( hal_platform& platform, const std::vector<arm::app::asr::AsrResult>& results); @@ -71,7 +71,7 @@ namespace app { /* If the request has a valid size, set the audio index. */ if (clipIndex < NUMBER_OF_FILES) { - if (!_SetAppCtxClipIdx(ctx, clipIndex)) { + if (!SetAppCtxClipIdx(ctx, clipIndex)) { return false; } } @@ -207,20 +207,20 @@ namespace app { ctx.Set<std::vector<arm::app::asr::AsrResult>>("results", results); - if (!_PresentInferenceResult(platform, results)) { + if (!PresentInferenceResult(platform, results)) { return false; } profiler.PrintProfilingResult(); - _IncrementAppCtxClipIdx(ctx); + IncrementAppCtxClipIdx(ctx); } while (runAll && ctx.Get<uint32_t>("clipIndex") != startClipIdx); return true; } - static void _IncrementAppCtxClipIdx(ApplicationContext& ctx) + static void IncrementAppCtxClipIdx(ApplicationContext& ctx) { auto curAudioIdx = ctx.Get<uint32_t>("clipIndex"); @@ -232,7 +232,7 @@ namespace app { ctx.Set<uint32_t>("clipIndex", curAudioIdx); } - static bool _SetAppCtxClipIdx(ApplicationContext& ctx, const uint32_t idx) + static bool SetAppCtxClipIdx(ApplicationContext& ctx, uint32_t idx) { if (idx >= NUMBER_OF_FILES) { printf_err("Invalid idx %u (expected less than %u)\n", @@ -244,8 +244,8 @@ namespace app { return true; } - static bool _PresentInferenceResult(hal_platform& platform, - const std::vector<arm::app::asr::AsrResult>& results) + static bool PresentInferenceResult(hal_platform& platform, + const std::vector<arm::app::asr::AsrResult>& results) { constexpr uint32_t dataPsnTxtStartX1 = 20; constexpr uint32_t dataPsnTxtStartY1 = 60; diff --git a/source/use_case/asr/src/Wav2LetterMfcc.cc b/source/use_case/asr/src/Wav2LetterMfcc.cc index 92c91bc..0eb152a 100644 --- a/source/use_case/asr/src/Wav2LetterMfcc.cc +++ b/source/use_case/asr/src/Wav2LetterMfcc.cc @@ -27,8 +27,8 @@ namespace audio { bool Wav2LetterMFCC::ApplyMelFilterBank( std::vector<float>& fftVec, std::vector<std::vector<float>>& melFilterBank, - std::vector<int32_t>& filterBankFilterFirst, - std::vector<int32_t>& filterBankFilterLast, + std::vector<uint32_t>& filterBankFilterFirst, + std::vector<uint32_t>& filterBankFilterLast, std::vector<float>& melEnergies) { const size_t numBanks = melEnergies.size(); @@ -41,11 +41,14 @@ namespace audio { for (size_t bin = 0; bin < numBanks; ++bin) { auto filterBankIter = melFilterBank[bin].begin(); - float melEnergy = 1e-10; /* Avoid log of zero at later stages, same value used in librosa. */ - const int32_t firstIndex = filterBankFilterFirst[bin]; - const int32_t lastIndex = filterBankFilterLast[bin]; - - for (int32_t i = firstIndex; i <= lastIndex; ++i) { + auto end = melFilterBank[bin].end(); + /* Avoid log of zero at later stages, same value used in librosa. + * The number was used during our default wav2letter model training. */ + float melEnergy = 1e-10; + const uint32_t firstIndex = filterBankFilterFirst[bin]; + const uint32_t lastIndex = std::min<uint32_t>(filterBankFilterLast[bin], fftVec.size() - 1); + + for (uint32_t i = firstIndex; i <= lastIndex && filterBankIter != end; ++i) { melEnergy += (*filterBankIter++ * fftVec[i]); } @@ -73,7 +76,7 @@ namespace audio { /* Scale the log values and get the max. */ for (auto iterM = melEnergies.begin(), iterL = vecLogEnergies.begin(); - iterM != melEnergies.end(); ++iterM, ++iterL) { + iterM != melEnergies.end() && iterL != vecLogEnergies.end(); ++iterM, ++iterL) { *iterM = *iterL * multiplier; @@ -86,8 +89,8 @@ namespace audio { /* Clamp the mel energies. */ constexpr float maxDb = 80.0; const float clampLevelLowdB = maxMelEnergy - maxDb; - for (auto iter = melEnergies.begin(); iter != melEnergies.end(); ++iter) { - *iter = std::max(*iter, clampLevelLowdB); + for (float& melEnergy : melEnergies) { + melEnergy = std::max(melEnergy, clampLevelLowdB); } } diff --git a/source/use_case/asr/src/Wav2LetterPostprocess.cc b/source/use_case/asr/src/Wav2LetterPostprocess.cc index 60ee51e..9157a6f 100644 --- a/source/use_case/asr/src/Wav2LetterPostprocess.cc +++ b/source/use_case/asr/src/Wav2LetterPostprocess.cc @@ -39,13 +39,13 @@ namespace asr { const bool lastIteration) { /* Basic checks. */ - if (!this->_IsInputValid(tensor, axisIdx)) { + if (!this->IsInputValid(tensor, axisIdx)) { return false; } /* Irrespective of tensor type, we use unsigned "byte" */ uint8_t* ptrData = tflite::GetTensorData<uint8_t>(tensor); - const uint32_t elemSz = this->_GetTensorElementSize(tensor); + const uint32_t elemSz = this->GetTensorElementSize(tensor); /* Other sanity checks. */ if (0 == elemSz) { @@ -59,13 +59,15 @@ namespace asr { /* Which axis do we need to process? */ switch (axisIdx) { case arm::app::Wav2LetterModel::ms_outputRowsIdx: - return this->_EraseSectionsRowWise(ptrData, - elemSz * tensor->dims->data[arm::app::Wav2LetterModel::ms_outputColsIdx], - lastIteration); + return this->EraseSectionsRowWise(ptrData, + elemSz * + tensor->dims->data[arm::app::Wav2LetterModel::ms_outputColsIdx], + lastIteration); case arm::app::Wav2LetterModel::ms_outputColsIdx: - return this->_EraseSectionsColWise(ptrData, - elemSz * tensor->dims->data[arm::app::Wav2LetterModel::ms_outputRowsIdx], - lastIteration); + return this->EraseSectionsColWise(ptrData, + elemSz * + tensor->dims->data[arm::app::Wav2LetterModel::ms_outputRowsIdx], + lastIteration); default: printf_err("Unsupported axis index: %u\n", axisIdx); } @@ -73,8 +75,8 @@ namespace asr { return false; } - bool Postprocess::_IsInputValid(TfLiteTensor* tensor, - const uint32_t axisIdx) const + bool Postprocess::IsInputValid(TfLiteTensor* tensor, + const uint32_t axisIdx) const { if (nullptr == tensor) { return false; @@ -96,17 +98,15 @@ namespace asr { return true; } - uint32_t Postprocess::_GetTensorElementSize(TfLiteTensor* tensor) + uint32_t Postprocess::GetTensorElementSize(TfLiteTensor* tensor) { switch(tensor->type) { case kTfLiteUInt8: - return 1; case kTfLiteInt8: return 1; case kTfLiteInt16: return 2; case kTfLiteInt32: - return 4; case kTfLiteFloat32: return 4; default: @@ -117,7 +117,7 @@ namespace asr { return 0; } - bool Postprocess::_EraseSectionsRowWise( + bool Postprocess::EraseSectionsRowWise( uint8_t* ptrData, const uint32_t strideSzBytes, const bool lastIteration) @@ -154,8 +154,8 @@ namespace asr { return true; } - bool Postprocess::_EraseSectionsColWise( - uint8_t* ptrData, + bool Postprocess::EraseSectionsColWise( + const uint8_t* ptrData, const uint32_t strideSzBytes, const bool lastIteration) { diff --git a/source/use_case/asr/src/Wav2LetterPreprocess.cc b/source/use_case/asr/src/Wav2LetterPreprocess.cc index e46cca3..d65ea75 100644 --- a/source/use_case/asr/src/Wav2LetterPreprocess.cc +++ b/source/use_case/asr/src/Wav2LetterPreprocess.cc @@ -88,12 +88,12 @@ namespace asr { } /* Compute first and second order deltas from MFCCs. */ - this->_ComputeDeltas(this->_m_mfccBuf, - this->_m_delta1Buf, - this->_m_delta2Buf); + Preprocess::ComputeDeltas(this->_m_mfccBuf, + this->_m_delta1Buf, + this->_m_delta2Buf); /* Normalise. */ - this->_Normalise(); + this->Normalise(); /* Quantise. */ QuantParams quantParams = GetTensorQuantParams(tensor); @@ -105,11 +105,11 @@ namespace asr { switch(tensor->type) { case kTfLiteUInt8: - return this->_Quantise<uint8_t>( + return this->Quantise<uint8_t>( tflite::GetTensorData<uint8_t>(tensor), tensor->bytes, quantParams.scale, quantParams.offset); case kTfLiteInt8: - return this->_Quantise<int8_t>( + return this->Quantise<int8_t>( tflite::GetTensorData<int8_t>(tensor), tensor->bytes, quantParams.scale, quantParams.offset); default: @@ -120,9 +120,9 @@ namespace asr { return false; } - bool Preprocess::_ComputeDeltas(Array2d<float>& mfcc, - Array2d<float>& delta1, - Array2d<float>& delta2) + bool Preprocess::ComputeDeltas(Array2d<float>& mfcc, + Array2d<float>& delta1, + Array2d<float>& delta2) { const std::vector <float> delta1Coeffs = {6.66666667e-02, 5.00000000e-02, 3.33333333e-02, @@ -175,20 +175,20 @@ namespace asr { return true; } - float Preprocess::_GetMean(Array2d<float>& vec) + float Preprocess::GetMean(Array2d<float>& vec) { return math::MathUtils::MeanF32(vec.begin(), vec.totalSize()); } - float Preprocess::_GetStdDev(Array2d<float>& vec, const float mean) + float Preprocess::GetStdDev(Array2d<float>& vec, const float mean) { return math::MathUtils::StdDevF32(vec.begin(), vec.totalSize(), mean); } - void Preprocess::_NormaliseVec(Array2d<float>& vec) + void Preprocess::NormaliseVec(Array2d<float>& vec) { - auto mean = Preprocess::_GetMean(vec); - auto stddev = Preprocess::_GetStdDev(vec, mean); + auto mean = Preprocess::GetMean(vec); + auto stddev = Preprocess::GetStdDev(vec, mean); debug("Mean: %f, Stddev: %f\n", mean, stddev); if (stddev == 0) { @@ -204,14 +204,14 @@ namespace asr { } } - void Preprocess::_Normalise() + void Preprocess::Normalise() { - Preprocess::_NormaliseVec(this->_m_mfccBuf); - Preprocess::_NormaliseVec(this->_m_delta1Buf); - Preprocess::_NormaliseVec(this->_m_delta2Buf); + Preprocess::NormaliseVec(this->_m_mfccBuf); + Preprocess::NormaliseVec(this->_m_delta1Buf); + Preprocess::NormaliseVec(this->_m_delta2Buf); } - float Preprocess::_GetQuantElem( + float Preprocess::GetQuantElem( const float elem, const float quantScale, const int quantOffset, |