diff options
Diffstat (limited to 'samples/SpeechRecognition/src/Preprocess.cpp')
-rw-r--r-- | samples/SpeechRecognition/src/Preprocess.cpp | 192 |
1 files changed, 0 insertions, 192 deletions
diff --git a/samples/SpeechRecognition/src/Preprocess.cpp b/samples/SpeechRecognition/src/Preprocess.cpp deleted file mode 100644 index 86279619d7..0000000000 --- a/samples/SpeechRecognition/src/Preprocess.cpp +++ /dev/null @@ -1,192 +0,0 @@ -// -// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include <algorithm> -#include <numeric> -#include <math.h> -#include <string.h> - -#include "MathUtils.hpp" -#include "Preprocess.hpp" - -Preprocess::Preprocess( - const uint32_t windowLen, - const uint32_t windowStride, - const MFCC mfccInst): - _m_mfcc(mfccInst), - _m_mfccBuf(mfccInst._m_params.m_numMfccFeatures, mfccInst._m_params.m_numMfccVectors), - _m_delta1Buf(mfccInst._m_params.m_numMfccFeatures, mfccInst._m_params.m_numMfccVectors), - _m_delta2Buf(mfccInst._m_params.m_numMfccFeatures, mfccInst._m_params.m_numMfccVectors), - _m_windowLen(windowLen), - _m_windowStride(windowStride) -{ - if (mfccInst._m_params.m_numMfccFeatures > 0 && windowLen > 0) - { - this->_m_mfcc.Init(); - } -} - -Preprocess::~Preprocess() -{ -} - -bool Preprocess::Invoke( const float* audioData, const uint32_t audioDataLen, std::vector<int8_t>& output, - int quantOffset, float quantScale) -{ - this->_m_window = SlidingWindow<const float>( - audioData, audioDataLen, - this->_m_windowLen, this->_m_windowStride); - - uint32_t mfccBufIdx = 0; - - // Init buffers with 0 - std::fill(_m_mfccBuf.begin(), _m_mfccBuf.end(), 0.f); - std::fill(_m_delta1Buf.begin(), _m_delta1Buf.end(), 0.f); - std::fill(_m_delta2Buf.begin(), _m_delta2Buf.end(), 0.f); - - /* While we can slide over the window */ - while (this->_m_window.HasNext()) - { - const float* mfccWindow = this->_m_window.Next(); - auto mfccAudioData = std::vector<float>( - mfccWindow, - mfccWindow + this->_m_windowLen); - - auto mfcc = this->_m_mfcc.MfccCompute(mfccAudioData); - for (size_t i = 0; i < this->_m_mfccBuf.size(0); ++i) - { - this->_m_mfccBuf(i, mfccBufIdx) = mfcc[i]; - } - ++mfccBufIdx; - } - - /* Pad MFCC if needed by repeating last feature vector */ - while (mfccBufIdx != this->_m_mfcc._m_params.m_numMfccVectors) - { - memcpy(&this->_m_mfccBuf(0, mfccBufIdx), - &this->_m_mfccBuf(0, mfccBufIdx-1), sizeof(float)*this->_m_mfcc._m_params.m_numMfccFeatures); - ++mfccBufIdx; - } - - /* Compute first and second order deltas from MFCCs */ - this->_ComputeDeltas(this->_m_mfccBuf, - this->_m_delta1Buf, - this->_m_delta2Buf); - - /* Normalise */ - this->_Normalise(); - - return this->_Quantise<int8_t>(output.data(), quantOffset, quantScale); -} - -bool Preprocess::_ComputeDeltas(Array2d<float>& mfcc, - Array2d<float>& delta1, - Array2d<float>& delta2) -{ - const std::vector <float> delta1Coeffs = - {6.66666667e-02, 5.00000000e-02, 3.33333333e-02, - 1.66666667e-02, -3.46944695e-18, -1.66666667e-02, - -3.33333333e-02, -5.00000000e-02, -6.66666667e-02}; - - const std::vector <float> delta2Coeffs = - {0.06060606, 0.01515152, -0.01731602, - -0.03679654, -0.04329004, -0.03679654, - -0.01731602, 0.01515152, 0.06060606}; - - if (delta1.size(0) == 0 || delta2.size(0) != delta1.size(0) || - mfcc.size(0) == 0 || mfcc.size(1) == 0) - { - return false; - } - - /* Get the middle index; coeff vec len should always be odd */ - const size_t coeffLen = delta1Coeffs.size(); - const size_t fMidIdx = (coeffLen - 1)/2; - const size_t numFeatures = mfcc.size(0); - const size_t numFeatVectors = mfcc.size(1); - - /* iterate through features in MFCC vector*/ - for (size_t i = 0; i < numFeatures; ++i) - { - /* for each feature, iterate through time (t) samples representing feature evolution and - * calculate d/dt and d^2/dt^2, using 1d convolution with differential kernels. - * Convolution padding = valid, result size is `time length - kernel length + 1`. - * The result is padded with 0 from both sides to match the size of initial time samples data. - * - * For the small filter, conv1d implementation as a simple loop is efficient enough. - * Filters of a greater size would need CMSIS-DSP functions to be used, like arm_fir_f32. - */ - - for (size_t j = fMidIdx; j < numFeatVectors - fMidIdx; ++j) - { - float d1 = 0; - float d2 = 0; - const size_t mfccStIdx = j - fMidIdx; - - for (size_t k = 0, m = coeffLen - 1; k < coeffLen; ++k, --m) - { - - d1 += mfcc(i,mfccStIdx + k) * delta1Coeffs[m]; - d2 += mfcc(i,mfccStIdx + k) * delta2Coeffs[m]; - } - - delta1(i,j) = d1; - delta2(i,j) = d2; - } - } - - return true; -} - -float Preprocess::_GetMean(Array2d<float>& vec) -{ - return MathUtils::MeanF32(vec.begin(), vec.totalSize()); -} - -float Preprocess::_GetStdDev(Array2d<float>& vec, const float mean) -{ - return MathUtils::StdDevF32(vec.begin(), vec.totalSize(), mean); -} - -void Preprocess::_NormaliseVec(Array2d<float>& vec) -{ - auto mean = Preprocess::_GetMean(vec); - auto stddev = Preprocess::_GetStdDev(vec, mean); - - if (stddev == 0) - { - std::fill(vec.begin(), vec.end(), 0); - } - else - { - const float stddevInv = 1.f/stddev; - const float normalisedMean = mean/stddev; - - auto NormalisingFunction = [=](float &value) { - value = value * stddevInv - normalisedMean; - }; - std::for_each(vec.begin(), vec.end(), NormalisingFunction); - } -} - -void Preprocess::_Normalise() -{ - Preprocess::_NormaliseVec(this->_m_mfccBuf); - Preprocess::_NormaliseVec(this->_m_delta1Buf); - Preprocess::_NormaliseVec(this->_m_delta2Buf); -} - -float Preprocess::_GetQuantElem( - const float elem, - const float quantScale, - const int quantOffset, - const float minVal, - const float maxVal) -{ - float val = std::round((elem/quantScale) + quantOffset); - float maxim = std::max<float>(val, minVal); - float returnVal = std::min<float>(std::max<float>(val, minVal), maxVal); - return returnVal; -}
\ No newline at end of file |