aboutsummaryrefslogtreecommitdiff
path: root/samples/SpeechRecognition/src/Wav2LetterPreprocessor.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'samples/SpeechRecognition/src/Wav2LetterPreprocessor.cpp')
-rw-r--r--samples/SpeechRecognition/src/Wav2LetterPreprocessor.cpp187
1 files changed, 187 insertions, 0 deletions
diff --git a/samples/SpeechRecognition/src/Wav2LetterPreprocessor.cpp b/samples/SpeechRecognition/src/Wav2LetterPreprocessor.cpp
new file mode 100644
index 0000000000..9329d5e4d5
--- /dev/null
+++ b/samples/SpeechRecognition/src/Wav2LetterPreprocessor.cpp
@@ -0,0 +1,187 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#include "MathUtils.hpp"
+#include <cstring>
+#include <cmath>
+#include <numeric>
+#include <algorithm>
+#include <memory>
+#include "Wav2LetterPreprocessor.hpp"
+#include "Wav2LetterMFCC.hpp"
+
+float Wav2LetterPreprocessor::GetMean(Array2d<float>& vec)
+{
+ return MathUtils::MeanF32(vec.begin(), vec.totalSize());
+}
+
+float Wav2LetterPreprocessor::GetStdDev(Array2d<float>& vec, const float mean)
+{
+ return MathUtils::StdDevF32(vec.begin(), vec.totalSize(), mean);
+}
+
+void Wav2LetterPreprocessor::NormaliseVec(Array2d<float>& vec)
+{
+ auto mean = Wav2LetterPreprocessor::GetMean(vec);
+ auto stddev = Wav2LetterPreprocessor::GetStdDev(vec, mean);
+
+ if (stddev == 0)
+ {
+ std::fill(vec.begin(), vec.end(), 0);
+ }
+ else
+ {
+ const float stddevInv = 1.f/stddev;
+ const float normalisedMean = mean/stddev;
+
+ auto NormalisingFunction = [=](float &value) {
+ value = value * stddevInv - normalisedMean;
+ };
+ std::for_each(vec.begin(), vec.end(), NormalisingFunction);
+ }
+}
+
+void Wav2LetterPreprocessor::Normalise()
+{
+ Wav2LetterPreprocessor::NormaliseVec(this->m_mfccBuf);
+ Wav2LetterPreprocessor::NormaliseVec(this->m_delta1Buf);
+ Wav2LetterPreprocessor::NormaliseVec(this->m_delta2Buf);
+}
+
+float Wav2LetterPreprocessor::GetQuantElem(
+ const float elem,
+ const float quantScale,
+ const int quantOffset,
+ const float minVal,
+ const float maxVal)
+{
+ float val = std::round((elem/quantScale) + quantOffset);
+ float returnVal = std::min<float>(std::max<float>(val, minVal), maxVal);
+ return returnVal;
+}
+
+bool Wav2LetterPreprocessor::Invoke(const float* audioData, const uint32_t audioDataLen, std::vector<int8_t>& output,
+ int quantOffset, float quantScale)
+{
+ this->m_window = SlidingWindow<const float>(
+ audioData, audioDataLen,
+ this->m_windowLen, this->m_windowStride);
+
+ uint32_t mfccBufIdx = 0;
+
+ // Init buffers with 0
+ std::fill(m_mfccBuf.begin(), m_mfccBuf.end(), 0.f);
+ std::fill(m_delta1Buf.begin(), m_delta1Buf.end(), 0.f);
+ std::fill(m_delta2Buf.begin(), m_delta2Buf.end(), 0.f);
+
+ // While we can slide over the window
+ while (this->m_window.HasNext())
+ {
+ const float* mfccWindow = this->m_window.Next();
+ auto mfccAudioData = std::vector<float>(
+ mfccWindow,
+ mfccWindow + this->m_windowLen);
+
+ auto mfcc = this->m_mfcc->MfccCompute(mfccAudioData);
+ for (size_t i = 0; i < this->m_mfccBuf.size(0); ++i)
+ {
+ this->m_mfccBuf(i, mfccBufIdx) = mfcc[i];
+ }
+ ++mfccBufIdx;
+ }
+
+ // Pad MFCC if needed by repeating last feature vector
+ while (mfccBufIdx != this->m_mfcc->m_params.m_numMfccVectors)
+ {
+ memcpy(&this->m_mfccBuf(0, mfccBufIdx),
+ &this->m_mfccBuf(0, mfccBufIdx - 1), sizeof(float) * this->m_mfcc->m_params.m_numMfccFeatures);
+ ++mfccBufIdx;
+ }
+
+ // Compute first and second order deltas from MFCCs
+ Wav2LetterPreprocessor::ComputeDeltas(this->m_mfccBuf,
+ this->m_delta1Buf,
+ this->m_delta2Buf);
+
+ // Normalise
+ this->Normalise();
+
+ return this->Quantise<int8_t>(output.data(), quantOffset, quantScale);
+}
+
+bool Wav2LetterPreprocessor::ComputeDeltas(Array2d<float>& mfcc,
+ Array2d<float>& delta1,
+ Array2d<float>& delta2)
+{
+ const std::vector <float> delta1Coeffs =
+ {6.66666667e-02, 5.00000000e-02, 3.33333333e-02,
+ 1.66666667e-02, -3.46944695e-18, -1.66666667e-02,
+ -3.33333333e-02, -5.00000000e-02, -6.66666667e-02};
+
+ const std::vector <float> delta2Coeffs =
+ {0.06060606, 0.01515152, -0.01731602,
+ -0.03679654, -0.04329004, -0.03679654,
+ -0.01731602, 0.01515152, 0.06060606};
+
+ if (delta1.size(0) == 0 || delta2.size(0) != delta1.size(0) ||
+ mfcc.size(0) == 0 || mfcc.size(1) == 0)
+ {
+ return false;
+ }
+
+ // Get the middle index; coeff vec len should always be odd
+ const size_t coeffLen = delta1Coeffs.size();
+ const size_t fMidIdx = (coeffLen - 1)/2;
+ const size_t numFeatures = mfcc.size(0);
+ const size_t numFeatVectors = mfcc.size(1);
+
+ // iterate through features in MFCC vector
+ for (size_t i = 0; i < numFeatures; ++i)
+ {
+ /* for each feature, iterate through time (t) samples representing feature evolution and
+ * calculate d/dt and d^2/dt^2, using 1d convolution with differential kernels.
+ * Convolution padding = valid, result size is `time length - kernel length + 1`.
+ * The result is padded with 0 from both sides to match the size of initial time samples data.
+ *
+ * For the small filter, conv1d implementation as a simple loop is efficient enough.
+ * Filters of a greater size would need CMSIS-DSP functions to be used, like arm_fir_f32.
+ */
+
+ for (size_t j = fMidIdx; j < numFeatVectors - fMidIdx; ++j)
+ {
+ float d1 = 0;
+ float d2 = 0;
+ const size_t mfccStIdx = j - fMidIdx;
+
+ for (size_t k = 0, m = coeffLen - 1; k < coeffLen; ++k, --m)
+ {
+
+ d1 += mfcc(i,mfccStIdx + k) * delta1Coeffs[m];
+ d2 += mfcc(i,mfccStIdx + k) * delta2Coeffs[m];
+ }
+
+ delta1(i,j) = d1;
+ delta2(i,j) = d2;
+ }
+ }
+
+ return true;
+}
+
+Wav2LetterPreprocessor::Wav2LetterPreprocessor(const uint32_t windowLen,
+ const uint32_t windowStride,
+ std::unique_ptr<Wav2LetterMFCC> mfccInst):
+ m_mfcc(std::move(mfccInst)),
+ m_mfccBuf(m_mfcc->m_params.m_numMfccFeatures, m_mfcc->m_params.m_numMfccVectors),
+ m_delta1Buf(m_mfcc->m_params.m_numMfccFeatures, m_mfcc->m_params.m_numMfccVectors),
+ m_delta2Buf(m_mfcc->m_params.m_numMfccFeatures, m_mfcc->m_params.m_numMfccVectors),
+ m_windowLen(windowLen),
+ m_windowStride(windowStride)
+{
+ if (m_mfcc->m_params.m_numMfccFeatures > 0 && windowLen > 0)
+ {
+ this->m_mfcc->Init();
+ }
+ std::fill(m_mfccBuf.begin(), m_mfccBuf.end(), 0.f);
+} \ No newline at end of file