From 23c26277086c78704a17f0dae86da947816320c0 Mon Sep 17 00:00:00 2001
From: George Gekov <george.gekov@arm.com>
Date: Mon, 16 Aug 2021 11:32:10 +0100
Subject: MLECO-2079 Adding the C++ KWS example

Signed-off-by: Eanna O Cathain <eanna.ocathain@arm.com>
Change-Id: I81899bbfaada32f478c2e2fc6441eabb94d8d0fc
---
 samples/common/include/Audio/AudioCapture.hpp   |  57 ++++++
 samples/common/include/Audio/DataStructures.hpp | 102 +++++++++++
 samples/common/include/Audio/MFCC.hpp           | 234 ++++++++++++++++++++++++
 samples/common/include/Audio/MathUtils.hpp      |  85 +++++++++
 samples/common/include/Audio/SlidingWindow.hpp  | 161 ++++++++++++++++
 5 files changed, 639 insertions(+)
 create mode 100644 samples/common/include/Audio/AudioCapture.hpp
 create mode 100644 samples/common/include/Audio/DataStructures.hpp
 create mode 100644 samples/common/include/Audio/MFCC.hpp
 create mode 100644 samples/common/include/Audio/MathUtils.hpp
 create mode 100644 samples/common/include/Audio/SlidingWindow.hpp

(limited to 'samples/common/include/Audio')
diff --git a/samples/common/include/Audio/AudioCapture.hpp b/samples/common/include/Audio/AudioCapture.hpp
new file mode 100644
index 0000000000..898bf911f4
--- /dev/null
+++ b/samples/common/include/Audio/AudioCapture.hpp
@@ -0,0 +1,57 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <string>
+#include <iostream>
+#include <cmath>
+#include <vector>
+#include <exception>
+
+#include "SlidingWindow.hpp"
+
+namespace audio
+{
+
+/**
+* @brief Class used to capture the audio data loaded from file, and to provide a method of
+ * extracting correctly positioned and appropriately sized audio windows
+*
+*/
+    class AudioCapture
+    {
+    public:
+
+        SlidingWindow<const float> m_window;
+
+        /**
+        * @brief Default constructor
+        */
+        AudioCapture() = default;
+
+        /**
+        * @brief Function to load the audio data captured from the
+         * input file to memory.
+        */
+        static std::vector<float> LoadAudioFile(std::string filePath);
+
+        /**
+        * @brief Function to initialize the sliding window. This will set its position in memory, its
+         * window size and its stride.
+        */
+        void InitSlidingWindow(float* data, size_t dataSize, int minSamples, size_t stride);
+
+        /**
+        * Checks whether there is another block of audio in memory to read
+        */
+        bool HasNext();
+
+        /**
+        * Retrieves the next block of audio if its available
+        */
+        std::vector<float> Next();
+    };
+} // namespace audio
\ No newline at end of file
diff --git a/samples/common/include/Audio/DataStructures.hpp b/samples/common/include/Audio/DataStructures.hpp
new file mode 100644
index 0000000000..9922265299
--- /dev/null
+++ b/samples/common/include/Audio/DataStructures.hpp
@@ -0,0 +1,102 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <stdio.h>
+#include <iterator>
+
+/**
+ * Class Array2d is a data structure that represents a two dimensional array.
+ * The data is allocated in contiguous memory, arranged row-wise
+ * and individual elements can be accessed with the () operator.
+ * For example a two dimensional array D of size (M, N) can be accessed:
+ *
+ *               _|<------------- col size = N  -------->|
+ *               |  D(r=0, c=0) D(r=0, c=1)... D(r=0, c=N)
+ *               |  D(r=1, c=0) D(r=1, c=1)... D(r=1, c=N)
+ *               |  ...
+ *    row size = M  ...
+ *               |  ...
+ *               _  D(r=M, c=0) D(r=M, c=1)... D(r=M, c=N)
+ *
+ */
+template<typename T>
+class Array2d
+{
+private:
+    size_t m_rows;
+    size_t m_cols;
+    T* m_data;
+
+public:
+    /**
+     * Creates the array2d with the given sizes.
+     *
+     * @param rows  number of rows.
+     * @param cols  number of columns.
+     */
+    Array2d(unsigned rows, unsigned cols)
+    {
+        if (rows == 0 || cols == 0) {
+            printf("Array2d constructor has 0 size.\n");
+            m_data = nullptr;
+            return;
+        }
+        m_rows = rows;
+        m_cols = cols;
+        m_data = new T[rows * cols];
+    }
+
+    ~Array2d()
+    {
+        delete[] m_data;
+    }
+
+    T& operator() (unsigned int row, unsigned int col)
+    {
+        return m_data[m_cols * row + col];
+    }
+
+    T operator() (unsigned int row, unsigned int col) const
+    {
+        return m_data[m_cols * row + col];
+    }
+
+    /**
+     * Gets rows number of the current array2d.
+     * @return number of rows.
+     */
+    size_t size(size_t dim)
+    {
+        switch (dim)
+        {
+            case 0:
+                return m_rows;
+            case 1:
+                return m_cols;
+            default:
+                return 0;
+        }
+    }
+
+    /**
+     * Gets the array2d total size.
+     */
+    size_t totalSize()
+    {
+        return m_rows * m_cols;
+    }
+
+    /**
+     * array2d iterator.
+     */
+    using iterator=T*;
+    using const_iterator=T const*;
+
+    iterator begin() { return m_data; }
+    iterator end() { return m_data + totalSize(); }
+    const_iterator begin() const { return m_data; }
+    const_iterator end() const { return m_data + totalSize(); };
+};
diff --git a/samples/common/include/Audio/MFCC.hpp b/samples/common/include/Audio/MFCC.hpp
new file mode 100644
index 0000000000..468bf92fae
--- /dev/null
+++ b/samples/common/include/Audio/MFCC.hpp
@@ -0,0 +1,234 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+
+#include <vector>
+#include <cstdint>
+#include <cmath>
+#include <limits>
+#include <string>
+
+/* MFCC's consolidated parameters */
+class MfccParams
+{
+public:
+    float       m_samplingFreq;
+    int         m_numFbankBins;
+    float       m_melLoFreq;
+    float       m_melHiFreq;
+    int         m_numMfccFeatures;
+    int         m_frameLen;
+    int         m_frameLenPadded;
+    bool        m_useHtkMethod;
+    int         m_numMfccVectors;
+    /** @brief  Constructor */
+    MfccParams(const float samplingFreq, const int numFbankBins,
+               const float melLoFreq, const float melHiFreq,
+               const int numMfccFeats, const int frameLen,
+               const bool useHtkMethod, const int numMfccVectors);
+    /* Delete the default constructor */
+    MfccParams()  = delete;
+    /* Default destructor */
+    ~MfccParams() = default;
+    /** @brief  String representation of parameters */
+    std::string Str();
+};
+
+/**
+ * @brief   Class for MFCC feature extraction.
+ *          Based on https://github.com/ARM-software/ML-KWS-for-MCU/blob/master/Deployment/Source/MFCC/mfcc.cpp
+ *          This class is designed to be generic and self-sufficient but
+ *          certain calculation routines can be overridden to accommodate
+ *          use-case specific requirements.
+ */
+class MFCC {
+public:
+    /**
+     * @brief       Constructor
+     * @param[in]   params   MFCC parameters
+    */
+    explicit MFCC(const MfccParams& params);
+
+    MFCC() = delete;
+
+    ~MFCC() = default;
+
+    /**
+    * @brief        Extract MFCC  features for one single small frame of
+    *               audio data e.g. 640 samples.
+    * @param[in]    audioData   Vector of audio samples to calculate
+    *                           features for.
+    * @return       Vector of extracted MFCC features.
+    **/
+    std::vector<float> MfccCompute(const std::vector<float>& audioData);
+
+    /** @brief  Initialise. */
+    void Init();
+
+   /**
+    * @brief        Extract MFCC features and quantise for one single small
+    *               frame of audio data e.g. 640 samples.
+    * @param[in]    audioData     Vector of audio samples to calculate
+    *                             features for.
+    * @param[in]    quantScale    Quantisation scale.
+    * @param[in]    quantOffset   Quantisation offset.
+    * @return       Vector of extracted quantised MFCC features.
+    **/
+    template<typename T>
+    std::vector<T> MfccComputeQuant(const std::vector<float>& audioData,
+                                    const float quantScale,
+                                    const int quantOffset)
+    {
+        this->MfccComputePreFeature(audioData);
+        float minVal = std::numeric_limits<T>::min();
+        float maxVal = std::numeric_limits<T>::max();
+
+        std::vector<T> mfccOut(this->m_params.m_numMfccFeatures);
+        const size_t numFbankBins = this->m_params.m_numFbankBins;
+
+        /* Take DCT. Uses matrix mul. */
+        for (size_t i = 0, j = 0; i < mfccOut.size(); ++i, j += numFbankBins)
+        {
+            float sum = 0;
+            for (size_t k = 0; k < numFbankBins; ++k)
+            {
+                sum += this->m_dctMatrix[j + k] * this->m_melEnergies[k];
+            }
+            /* Quantize to T. */
+            sum = std::round((sum / quantScale) + quantOffset);
+            mfccOut[i] = static_cast<T>(std::min<float>(std::max<float>(sum, minVal), maxVal));
+        }
+
+        return mfccOut;
+    }
+
+    MfccParams m_params;
+
+    /* Constants */
+    static constexpr float ms_logStep = /*logf(6.4)*/ 1.8562979903656 / 27.0;
+    static constexpr float ms_freqStep = 200.0 / 3;
+    static constexpr float ms_minLogHz = 1000.0;
+    static constexpr float ms_minLogMel = ms_minLogHz / ms_freqStep;
+
+protected:
+    /**
+     * @brief       Project input frequency to Mel Scale.
+     * @param[in]   freq           Input frequency in floating point.
+     * @param[in]   useHTKMethod   bool to signal if HTK method is to be
+     *                             used for calculation.
+     * @return      Mel transformed frequency in floating point.
+     **/
+    static float MelScale(float freq,
+                          bool  useHTKMethod = true);
+
+    /**
+     * @brief       Inverse Mel transform - convert MEL warped frequency
+     *              back to normal frequency.
+     * @param[in]   melFreq        Mel frequency in floating point.
+     * @param[in]   useHTKMethod   bool to signal if HTK method is to be
+     *                             used for calculation.
+     * @return      Real world frequency in floating point.
+     **/
+    static float InverseMelScale(float melFreq,
+                                 bool  useHTKMethod = true);
+
+    /**
+     * @brief       Populates MEL energies after applying the MEL filter
+     *              bank weights and adding them up to be placed into
+     *              bins, according to the filter bank's first and last
+     *              indices (pre-computed for each filter bank element
+     *              by CreateMelFilterBank function).
+     * @param[in]   fftVec                  Vector populated with FFT magnitudes.
+     * @param[in]   melFilterBank           2D Vector with filter bank weights.
+     * @param[in]   filterBankFilterFirst   Vector containing the first indices of filter bank
+     *                                      to be used for each bin.
+     * @param[in]   filterBankFilterLast    Vector containing the last indices of filter bank
+     *                                      to be used for each bin.
+     * @param[out]  melEnergies             Pre-allocated vector of MEL energies to be
+     *                                      populated.
+     * @return      true if successful, false otherwise.
+     */
+    virtual bool ApplyMelFilterBank(
+        std::vector<float>&                 fftVec,
+        std::vector<std::vector<float>>&    melFilterBank,
+        std::vector<uint32_t>&              filterBankFilterFirst,
+        std::vector<uint32_t>&              filterBankFilterLast,
+        std::vector<float>&                 melEnergies);
+
+    /**
+     * @brief           Converts the Mel energies for logarithmic scale.
+     * @param[in,out]   melEnergies   1D vector of Mel energies.
+     **/
+    virtual void ConvertToLogarithmicScale(std::vector<float>& melEnergies);
+
+    /**
+     * @brief       Create a matrix used to calculate Discrete Cosine
+     *              Transform.
+     * @param[in]   inputLength        Input length of the buffer on which
+     *                                 DCT will be performed.
+     * @param[in]   coefficientCount   Total coefficients per input length.
+     * @return      1D vector with inputLength x coefficientCount elements
+     *              populated with DCT coefficients.
+     */
+    virtual std::vector<float> CreateDCTMatrix(
+                                int32_t inputLength,
+                                int32_t coefficientCount);
+
+    /**
+     * @brief       Given the low and high Mel values, get the normaliser
+     *              for weights to be applied when populating the filter
+     *              bank.
+     * @param[in]   leftMel        Low Mel frequency value.
+     * @param[in]   rightMel       High Mel frequency value.
+     * @param[in]   useHTKMethod   bool to signal if HTK method is to be
+     *                             used for calculation.
+     * @return      Value to use for normalizing.
+     */
+    virtual float GetMelFilterBankNormaliser(
+                    const float&   leftMel,
+                    const float&   rightMel,
+                    bool     useHTKMethod);
+
+private:
+
+    std::vector<float>              m_frame;
+    std::vector<float>              m_buffer;
+    std::vector<float>              m_melEnergies;
+    std::vector<float>              m_windowFunc;
+    std::vector<std::vector<float>> m_melFilterBank;
+    std::vector<float>              m_dctMatrix;
+    std::vector<uint32_t>           m_filterBankFilterFirst;
+    std::vector<uint32_t>           m_filterBankFilterLast;
+    bool                            m_filterBankInitialised;
+
+    /**
+     * @brief       Initialises the filter banks and the DCT matrix. **/
+    void InitMelFilterBank();
+
+    /**
+     * @brief       Signals whether the instance of MFCC has had its
+     *              required buffers initialised.
+     * @return      true if initialised, false otherwise.
+     **/
+    bool IsMelFilterBankInited() const;
+
+    /**
+     * @brief       Create mel filter banks for MFCC calculation.
+     * @return      2D vector of floats.
+     **/
+    std::vector<std::vector<float>> CreateMelFilterBank();
+
+    /**
+     * @brief       Computes and populates internal memeber buffers used
+     *              in MFCC feature calculation
+     * @param[in]   audioData   1D vector of 16-bit audio data.
+     */
+    void MfccComputePreFeature(const std::vector<float>& audioData);
+
+    /** @brief       Computes the magnitude from an interleaved complex array. */
+    void ConvertToPowerSpectrum();
+
+};
diff --git a/samples/common/include/Audio/MathUtils.hpp b/samples/common/include/Audio/MathUtils.hpp
new file mode 100644
index 0000000000..1d8b0d31cc
--- /dev/null
+++ b/samples/common/include/Audio/MathUtils.hpp
@@ -0,0 +1,85 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <vector>
+#include <cmath>
+#include <cstdint>
+#include <numeric>
+
+class MathUtils
+{
+
+public:
+
+    /**
+     * @brief       Computes the FFT for the input vector
+     * @param[in]   input       Floating point vector of input elements
+     * @param[out]  fftOutput   Output buffer to be populated by computed
+     *                          FFTs
+     * @return      none
+     */
+    static void FftF32(std::vector<float>& input,
+                       std::vector<float>& fftOutput);
+
+
+    /**
+     * @brief       Computes the dot product of two 1D floating point
+     *              vectors.
+     *              result = sum(srcA[0]*srcB[0] + srcA[1]*srcB[1] + ..)
+     * @param[in]   srcPtrA     pointer to the first element of first
+     *                          array
+     * @param[in]   srcPtrB     pointer to the first element of second
+     *                          array
+     * @param[in]   srcLen      Number of elements in the array/vector
+     * @return      dot product
+     */
+    static float DotProductF32(const float* srcPtrA, float* srcPtrB,
+                               int srcLen);
+
+    /**
+     * @brief       Computes the squared magnitude of floating point
+     *              complex number array.
+     * @param[in]   ptrSrc      pointer to the first element of input
+     *                          array
+     * @param[in]   srcLen      Number of elements in the array/vector
+     * @param[out]  ptrDst      Output buffer to be populated
+     * @param[in]   dstLen      output buffer len (for sanity check only)
+     * @return      true if successful, false otherwise
+     */
+    static bool ComplexMagnitudeSquaredF32(const float* ptrSrc,
+                                           int srcLen,
+                                           float* ptrDst,
+                                           int dstLen);
+
+    /**
+         * @brief       Computes the natural logarithms of input floating point
+         *              vector
+         * @param[in]   input   Floating point input vector
+         * @param[out]  output  Pre-allocated buffer to be populated with
+         *                      natural log values of each input element
+         * @return      none
+         */
+    static void VecLogarithmF32(std::vector <float>& input,
+                                std::vector <float>& output);
+
+    /**
+         * @brief       Gets the mean of a floating point array of elements
+         * @param[in]   ptrSrc  pointer to the first element
+         * @param[in]   srcLen  Number of elements in the array/vector
+         * @return      average value
+         */
+    static float MeanF32(const float* ptrSrc, uint32_t srcLen);
+
+    /**
+     * @brief       Gets the standard deviation of a floating point array
+     *              of elements
+     * @param[in]   ptrSrc  pointer to the first element
+     * @param[in]   srcLen  Number of elements in the array/vector
+     * @param[in]   mean    pre-computed mean value
+     * @return      standard deviation value
+     */
+    static float StdDevF32(const float* ptrSrc, uint32_t srcLen,
+                           float mean);
+};
diff --git a/samples/common/include/Audio/SlidingWindow.hpp b/samples/common/include/Audio/SlidingWindow.hpp
new file mode 100644
index 0000000000..77498c6338
--- /dev/null
+++ b/samples/common/include/Audio/SlidingWindow.hpp
@@ -0,0 +1,161 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+template<class T>
+class SlidingWindow
+{
+protected:
+    T* m_start = nullptr;
+    size_t m_dataSize = 0;
+    size_t m_size = 0;
+    size_t m_stride = 0;
+    size_t m_count = 0;
+public:
+
+    /**
+     * Creates the window slider through the given data.
+     *
+     * @param data          pointer to the data to slide through.
+     * @param dataSize      size in T type elements wise.
+     * @param windowSize    sliding window size in T type wise elements.
+     * @param stride        stride size in T type wise elements.
+     */
+    SlidingWindow(T* data, size_t dataSize,
+                  size_t windowSize, size_t stride)
+    {
+        m_start = data;
+        m_dataSize = dataSize;
+        m_size = windowSize;
+        m_stride = stride;
+    }
+
+    SlidingWindow() = default;
+
+    ~SlidingWindow() = default;
+
+    /**
+     * Get the next data window.
+     * @return pointer to the next window, if next window is not available nullptr is returned.
+     */
+    virtual T* Next()
+    {
+        if (HasNext())
+        {
+            m_count++;
+            return m_start + Index() * m_stride;
+        }
+        else
+        {
+            return nullptr;
+        }
+    }
+
+    /**
+     * Checks if the next data portion is available.
+     * @return true if next data portion is available
+     */
+    bool HasNext()
+    {
+        return this->m_count < 1 + this->FractionalTotalStrides() && (this->NextWindowStartIndex() < this->m_dataSize);
+    }
+
+    /**
+     * Resest the slider to the initial position.
+     */
+    virtual void Reset()
+    {
+        m_count = 0;
+    }
+
+    /**
+     * Resest the slider to the initial position.
+     */
+    virtual size_t GetWindowSize()
+    {
+        return m_size;
+    }
+
+    /**
+     * Resets the slider to the start of the new data.
+     * New data size MUST be the same as the old one.
+     * @param newStart pointer to the new data to slide through.
+     */
+    virtual void Reset(T* newStart)
+    {
+        m_start = newStart;
+        Reset();
+    }
+
+    /**
+     * Gets current index of the sliding window.
+     * @return current position of the sliding window in number of strides
+     */
+    size_t Index()
+    {
+        return m_count == 0? 0: m_count - 1;
+    }
+
+    /**
+     * Gets the index from the start of the data where the next window will begin.
+     * While Index() returns the index of sliding window itself this function returns the index of the data
+     * element itself.
+     * @return Index from the start of the data where the next sliding window will begin.
+     */
+    virtual size_t NextWindowStartIndex()
+    {
+        return m_count == 0? 0: ((m_count) * m_stride);
+    }
+
+    /**
+     * Go to given sliding window index.
+     * @param index new position of the sliding window. if index is invalid (greater than possible range of strides)
+     *              then next call to Next() will return nullptr.
+     */
+    void FastForward(size_t index)
+    {
+        m_count = index;
+    }
+
+    /**
+     * Calculates whole number of times the window can stride through the given data.
+     * @return maximum number of strides.
+     */
+    size_t TotalStrides()
+    {
+        if (m_size > m_dataSize)
+        {
+            return 0;
+        }
+        return ((m_dataSize - m_size)/m_stride);
+    }
+
+    /**
+     * Calculates number of times the window can stride through the given data. May not be a whole number.
+     * @return Number of strides to cover all data.
+     */
+    float FractionalTotalStrides()
+    {
+        if(this->m_size > this->m_dataSize)
+        {
+            return this->m_dataSize / this->m_size;
+        }
+        else
+        {
+            return ((this->m_dataSize - this->m_size)/ static_cast<float>(this->m_stride));
+        }
+
+    }
+
+    /**
+     * Calculates the remaining data left to be processed
+     * @return The remaining unprocessed data
+     */
+    int RemainingData()
+    {
+        return this->m_dataSize - this->NextWindowStartIndex();
+    }
+};
\ No newline at end of file
-- 
cgit v1.2.1