1 files changed, 175 insertions, 0 deletions
diff --git a/samples/SpeechRecognition/include/Preprocess.hpp b/samples/SpeechRecognition/include/Preprocess.hpp
new file mode 100644
index 0000000000..80c568439b
--- /dev/null
+++ b/samples/SpeechRecognition/include/Preprocess.hpp
@@ -0,0 +1,175 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DataStructures.hpp"
+#include "SlidingWindow.hpp"
+#include <numeric>
+#include "MFCC.hpp"
+
+/* Class to facilitate pre-processing calculation for Wav2Letter model
+     * for ASR */
+using AudioWindow = SlidingWindow <const float>;
+
+class Preprocess
+{
+public:
+
+    MFCC                _m_mfcc;            /* MFCC instance */
+
+    /* Actual buffers to be populated */
+    Array2d<float>      _m_mfccBuf;         /* Contiguous buffer 1D: MFCC */
+    Array2d<float>      _m_delta1Buf;       /* Contiguous buffer 1D: Delta 1 */
+    Array2d<float>      _m_delta2Buf;       /* Contiguous buffer 1D: Delta 2 */
+
+    uint32_t            _m_windowLen;       /* Window length for MFCC */
+    uint32_t            _m_windowStride;    /* Window stride len for MFCC */
+    AudioWindow         _m_window;          /* Sliding window */
+
+    /**
+     * @brief       Constructor
+     * @param[in]   numMfccFeatures     number of MFCC features per window
+     * @param[in]   windowLen           number of elements in a window
+     * @param[in]   windowStride        stride (in number of elements) for
+     *                                  moving the window
+     * @param[in]   numMfccVectors      number of MFCC vectors per window
+    */
+    Preprocess(
+            const uint32_t  windowLen,
+            const uint32_t  windowStride,
+            const MFCC mfccInst);
+    Preprocess() = delete;
+    ~Preprocess();
+
+    /**
+     * @brief       Calculates the features required from audio data. This
+     *              includes MFCC, first and second order deltas,
+     *              normalisation and finally, quantisation. The tensor is
+     *              populated with feature from a given window placed along
+     *              in a single row.
+     * @param[in]   audioData     pointer to the first element of audio data
+     * @param[in]   audioDataLen  number of elements in the audio data
+     * @param[in]   tensor        tensor to be populated
+     * @return      true if successful, false in case of error.
+     */
+    bool Invoke(const float* audioData,
+                const uint32_t  audioDataLen,
+                std::vector<int8_t>& output,
+                int quantOffset,
+                float quantScale);
+
+
+protected:
+    /**
+     * @brief Computes the first and second order deltas for the
+     *        MFCC buffers - they are assumed to be populated.
+     *
+     * @param[in]  mfcc   MFCC buffers
+     * @param[out] delta1 result of the first diff computation
+     * @param[out] delta2 result of the second diff computation
+     *
+     * @return true if successful, false otherwise
+     */
+    static bool _ComputeDeltas(Array2d<float>& mfcc,
+                               Array2d<float>& delta1,
+                               Array2d<float>& delta2);
+
+    /**
+     * @brief      Given a 2D vector of floats, computes the mean
+     * @param[in]   vec      vector of vector of floats
+     * @return      mean value
+     */
+    static float _GetMean(Array2d<float>& vec);
+
+    /**
+     * @brief       Given a 2D vector of floats, computes the stddev
+     * @param[in]   vec   vector of vector of floats
+     * @param[in]   mean     mean value of the vector passed in
+     * @return      stddev value
+     */
+    static float _GetStdDev(Array2d<float>& vec,
+                            const float mean);
+
+    /**
+     * @brief           Given a 2D vector of floats, normalises it using
+     *                  the mean and the stddev
+     * @param[in/out]   vec      vector of vector of floats
+     * @return
+     */
+    static void _NormaliseVec(Array2d<float>& vec);
+
+    /**
+     * @brief       Normalises the MFCC and delta buffers
+     * @return
+     */
+    void _Normalise();
+
+    /**
+     * @brief       Given the quantisation and data type limits, computes
+     *              the quantised values of a floating point input data.
+     * @param[in]   elem            Element to be quantised
+     * @param[in]   quantScale      Scale
+     * @param[in]   quantOffset     Offset
+     * @param[in]   minVal          Numerical limit - minimum
+     * @param[in]   maxVal          Numerical limit - maximum
+     * @return      floating point quantised value
+     */
+    static float _GetQuantElem(
+            const float     elem,
+            const float     quantScale,
+            const int       quantOffset,
+            const float     minVal,
+            const float     maxVal);
+
+    /**
+     * @brief       Quantises the MFCC and delta buffers, and places them
+     *              in the output buffer. While doing so, it transposes
+     *              the data. Reason: Buffers in this class are arranged
+     *              for "time" axis to be row major. Primary reason for
+     *              this being the convolution speed up (as we can use
+     *              contiguous memory). The output, however, requires the
+     *              time axis to be in column major arrangement.
+     * @param[in]   outputBuf       pointer to the output buffer
+     * @param[in]   outputBufSz     output buffer's size
+     * @param[in]   quantScale      quantisation scale
+     * @param[in]   quantOffset     quantisation offset
+     */
+    template <typename T>
+    bool _Quantise(T* outputBuf, int quantOffset, float quantScale)
+    {
+        /* Populate */
+        T* outputBufMfcc = outputBuf;
+        T* outputBufD1 = outputBuf + this->_m_mfcc._m_params.m_numMfccFeatures;
+        T* outputBufD2 = outputBufD1 + this->_m_mfcc._m_params.m_numMfccFeatures;
+        const uint32_t ptrIncr = this->_m_mfcc._m_params.m_numMfccFeatures * 2; /* (3 vectors - 1 vector) */
+
+        const float minVal = std::numeric_limits<T>::min();
+        const float maxVal = std::numeric_limits<T>::max();
+
+        /* We need to do a transpose while copying and concatenating
+         * the tensor*/
+        for (uint32_t j = 0; j < this->_m_mfcc._m_params.m_numMfccVectors; ++j) {
+            for (uint32_t i = 0; i < this->_m_mfcc._m_params.m_numMfccFeatures; ++i)
+            {
+                *outputBufMfcc++ = static_cast<T>(this->_GetQuantElem(
+                        this->_m_mfccBuf(i, j), quantScale,
+                        quantOffset, minVal, maxVal));
+                *outputBufD1++ = static_cast<T>(this->_GetQuantElem(
+                        this->_m_delta1Buf(i, j), quantScale,
+                        quantOffset, minVal, maxVal));
+                *outputBufD2++ = static_cast<T>(this->_GetQuantElem(
+                        this->_m_delta2Buf(i, j), quantScale,
+                        quantOffset, minVal, maxVal));
+            }
+            outputBufMfcc += ptrIncr;
+            outputBufD1 += ptrIncr;
+            outputBufD2 += ptrIncr;
+        }
+
+        return true;
+    }
+};
+