9 files changed, 161 insertions, 757 deletions
diff --git a/samples/SpeechRecognition/include/AudioCapture.hpp b/samples/SpeechRecognition/include/AudioCapture.hpp
deleted file mode 100644
index 90c2eccacf..0000000000
--- a/samples/SpeechRecognition/include/AudioCapture.hpp
+++ /dev/null
@@ -1,62 +0,0 @@
-//
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#pragma once
-
-#include <string>
-#include <iostream>
-
-#include <math.h>
-
-#include <vector>
-
-#include <exception>
-
-#include "SlidingWindow.hpp"
-
-namespace asr
-{
-
-/**
-* @brief Class used to capture the audio data loaded from file, and to provide a method of
- * extracting correctly positioned and appropriately sized audio windows
-*
-*/
-    class AudioCapture
-    {
-    public:
-
-        SlidingWindow<const float> m_window;
-        int lastReadIdx= 0;
-
-        /**
-        * @brief Default constructor
-        */
-        AudioCapture()
-        {};
-
-        /**
-        * @brief Function to load the audio data captured from the
-         * input file to memory.
-        */
-        std::vector<float> LoadAudioFile(std::string filePath);
-
-        /**
-        * @brief Function to initialize the sliding window. This will set its position in memory, its
-         * window size and its stride.
-        */
-        void InitSlidingWindow(float* data, size_t dataSize, int minSamples, size_t stride);
-
-        /**
-        * Checks whether there is another block of audio in memory to read
-        */
-        bool HasNext();
-
-        /**
-        * Retrieves the next block of audio if its available
-        */
-        std::vector<float> Next();
-    };
-} // namespace asr
-\ No newline at end of file
diff --git a/samples/SpeechRecognition/include/DataStructures.hpp b/samples/SpeechRecognition/include/DataStructures.hpp
deleted file mode 100644
index 9922265299..0000000000
--- a/samples/SpeechRecognition/include/DataStructures.hpp
+++ /dev/null
@@ -1,102 +0,0 @@
-//
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-#pragma once
-
-#include <stdio.h>
-#include <iterator>
-
-/**
- * Class Array2d is a data structure that represents a two dimensional array.
- * The data is allocated in contiguous memory, arranged row-wise
- * and individual elements can be accessed with the () operator.
- * For example a two dimensional array D of size (M, N) can be accessed:
- *
- *               _|<------------- col size = N  -------->|
- *               |  D(r=0, c=0) D(r=0, c=1)... D(r=0, c=N)
- *               |  D(r=1, c=0) D(r=1, c=1)... D(r=1, c=N)
- *               |  ...
- *    row size = M  ...
- *               |  ...
- *               _  D(r=M, c=0) D(r=M, c=1)... D(r=M, c=N)
- *
- */
-template<typename T>
-class Array2d
-{
-private:
-    size_t m_rows;
-    size_t m_cols;
-    T* m_data;
-
-public:
-    /**
-     * Creates the array2d with the given sizes.
-     *
-     * @param rows  number of rows.
-     * @param cols  number of columns.
-     */
-    Array2d(unsigned rows, unsigned cols)
-    {
-        if (rows == 0 || cols == 0) {
-            printf("Array2d constructor has 0 size.\n");
-            m_data = nullptr;
-            return;
-        }
-        m_rows = rows;
-        m_cols = cols;
-        m_data = new T[rows * cols];
-    }
-
-    ~Array2d()
-    {
-        delete[] m_data;
-    }
-
-    T& operator() (unsigned int row, unsigned int col)
-    {
-        return m_data[m_cols * row + col];
-    }
-
-    T operator() (unsigned int row, unsigned int col) const
-    {
-        return m_data[m_cols * row + col];
-    }
-
-    /**
-     * Gets rows number of the current array2d.
-     * @return number of rows.
-     */
-    size_t size(size_t dim)
-    {
-        switch (dim)
-        {
-            case 0:
-                return m_rows;
-            case 1:
-                return m_cols;
-            default:
-                return 0;
-        }
-    }
-
-    /**
-     * Gets the array2d total size.
-     */
-    size_t totalSize()
-    {
-        return m_rows * m_cols;
-    }
-
-    /**
-     * array2d iterator.
-     */
-    using iterator=T*;
-    using const_iterator=T const*;
-
-    iterator begin() { return m_data; }
-    iterator end() { return m_data + totalSize(); }
-    const_iterator begin() const { return m_data; }
-    const_iterator end() const { return m_data + totalSize(); };
-};
diff --git a/samples/SpeechRecognition/include/Decoder.hpp b/samples/SpeechRecognition/include/Decoder.hpp
index 69d97ccf64..9dd484a5d1 100644
--- a/samples/SpeechRecognition/include/Decoder.hpp
+++ b/samples/SpeechRecognition/include/Decoder.hpp
@@ -46,8 +46,8 @@ namespace asr
                     rowVector.emplace_back(static_cast<int16_t>(contextToProcess[row * rowLength + j]));
                 }
 
-                int max_index = std::distance(rowVector.begin(),std::max_element(rowVector.begin(), rowVector.end()));
-                unfilteredText.emplace_back(this->m_labels.at(max_index)[0]);
+                int maxIndex = std::distance(rowVector.begin(), std::max_element(rowVector.begin(), rowVector.end()));
+                unfilteredText.emplace_back(this->m_labels.at(maxIndex)[0]);
             }
 
             std::string filteredText = FilterCharacters(unfilteredText);
diff --git a/samples/SpeechRecognition/include/MFCC.hpp b/samples/SpeechRecognition/include/MFCC.hpp
deleted file mode 100644
index 14b6d9fe79..0000000000
--- a/samples/SpeechRecognition/include/MFCC.hpp
+++ /dev/null
@@ -1,244 +0,0 @@
-//
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#pragma once
-
-#include <vector>
-#include <cstdint>
-#include <cmath>
-#include <limits>
-#include <string>
-
-/* MFCC's consolidated parameters */
-class MfccParams
-{
-public:
-    float       m_samplingFreq;
-    int         m_numFbankBins;
-    float       m_melLoFreq;
-    float       m_melHiFreq;
-    int         m_numMfccFeatures;
-    int         m_frameLen;
-    int         m_frameLenPadded;
-    bool        m_useHtkMethod;
-    int         m_numMfccVectors;
-
-    /** @brief  Constructor */
-    MfccParams(const float samplingFreq, const int numFbankBins,
-               const float melLoFreq, const float melHiFreq,
-               const int numMfccFeats, const int frameLen,
-               const bool useHtkMethod, const int numMfccVectors);
-
-    /* Delete the default constructor */
-    MfccParams()  = delete;
-
-    /* Default destructor */
-    ~MfccParams() = default;
-
-    /** @brief  String representation of parameters */
-    std::string Str();
-};
-
-/**
- * @brief   Class for MFCC feature extraction.
- *          Based on https://github.com/ARM-software/ML-KWS-for-MCU/blob/master/Deployment/Source/MFCC/mfcc.cpp
- *          This class is designed to be generic and self-sufficient but
- *          certain calculation routines can be overridden to accommodate
- *          use-case specific requirements.
- */
-class MFCC
-{
-
-public:
-
-    /**
-    * @brief        Extract MFCC  features for one single small frame of
-    *               audio data e.g. 640 samples.
-    * @param[in]    audioData - Vector of audio samples to calculate
-    *               features for.
-    * @return       Vector of extracted MFCC features.
-    **/
-    std::vector<float> MfccCompute(const std::vector<float>& audioData);
-
-    MfccParams _m_params;
-
-    /**
-     * @brief       Constructor
-     * @param[in]   params - MFCC parameters
-    */
-    MFCC(const MfccParams& params);
-
-    /* Delete the default constructor */
-    MFCC() = delete;
-
-    /** @brief  Default destructor */
-    ~MFCC() = default;
-
-    /** @brief  Initialise */
-    void Init();
-
-    /**
-     * @brief        Extract MFCC features and quantise for one single small
-     *               frame of audio data e.g. 640 samples.
-     * @param[in]    audioData - Vector of audio samples to calculate
-     *               features for.
-     * @param[in]    quantScale - quantisation scale.
-     * @param[in]    quantOffset - quantisation offset
-     * @return      Vector of extracted quantised MFCC features.
-     **/
-    template<typename T>
-    std::vector<T> MfccComputeQuant(const std::vector<float>& audioData,
-                                    const float quantScale,
-                                    const int quantOffset)
-    {
-        this->_MfccComputePreFeature(audioData);
-        float minVal = std::numeric_limits<T>::min();
-        float maxVal = std::numeric_limits<T>::max();
-
-        std::vector<T> mfccOut(this->_m_params.m_numMfccFeatures);
-        const size_t numFbankBins = this->_m_params.m_numFbankBins;
-
-        /* Take DCT. Uses matrix mul. */
-        for (size_t i = 0, j = 0; i < mfccOut.size(); ++i, j += numFbankBins)
-        {
-            float sum = 0;
-            for (size_t k = 0; k < numFbankBins; ++k)
-            {
-                sum += this->_m_dctMatrix[j + k] * this->_m_melEnergies[k];
-            }
-            /* Quantize to T. */
-            sum = std::round((sum / quantScale) + quantOffset);
-            mfccOut[i] = static_cast<T>(std::min<float>(std::max<float>(sum, minVal), maxVal));
-        }
-
-        return mfccOut;
-    }
-
-    /* Constants */
-    static constexpr float logStep = 1.8562979903656 / 27.0;
-    static constexpr float freqStep = 200.0 / 3;
-    static constexpr float minLogHz = 1000.0;
-    static constexpr float minLogMel = minLogHz / freqStep;
-
-protected:
-    /**
-     * @brief       Project input frequency to Mel Scale.
-     * @param[in]   freq - input frequency in floating point
-     * @param[in]   useHTKmethod - bool to signal if HTK method is to be
-     *              used for calculation
-     * @return      Mel transformed frequency in floating point
-     **/
-    static float MelScale(const float    freq,
-                          const bool     useHTKMethod = true);
-
-    /**
-     * @brief       Inverse Mel transform - convert MEL warped frequency
-     *              back to normal frequency
-     * @param[in]   freq - Mel frequency in floating point
-     * @param[in]   useHTKmethod - bool to signal if HTK method is to be
-     *              used for calculation
-     * @return      Real world frequency in floating point
-     **/
-    static float InverseMelScale(const float melFreq,
-                                 const bool  useHTKMethod = true);
-
-    /**
-     * @brief       Populates MEL energies after applying the MEL filter
-     *              bank weights and adding them up to be placed into
-     *              bins, according to the filter bank's first and last
-     *              indices (pre-computed for each filter bank element
-     *              by _CreateMelFilterBank function).
-     * @param[in]   fftVec                  Vector populated with FFT magnitudes
-     * @param[in]   melFilterBank           2D Vector with filter bank weights
-     * @param[in]   filterBankFilterFirst   Vector containing the first indices of filter bank
-     *                                      to be used for each bin.
-     * @param[in]   filterBankFilterLast    Vector containing the last indices of filter bank
-     *                                      to be used for each bin.
-     * @param[out]  melEnergies             Pre-allocated vector of MEL energies to be
-     *                                      populated.
-     * @return      true if successful, false otherwise
-     */
-    virtual bool ApplyMelFilterBank(
-            std::vector<float>&                 fftVec,
-            std::vector<std::vector<float>>&    melFilterBank,
-            std::vector<int32_t>&               filterBankFilterFirst,
-            std::vector<int32_t>&               filterBankFilterLast,
-            std::vector<float>&                 melEnergies);
-
-    /**
-     * @brief           Converts the Mel energies for logarithmic scale
-     * @param[in/out]   melEnergies - 1D vector of Mel energies
-     **/
-    virtual void ConvertToLogarithmicScale(std::vector<float>& melEnergies);
-
-    /**
-     * @brief       Create a matrix used to calculate Discrete Cosine
-     *              Transform.
-     * @param[in]   inputLength - input length of the buffer on which
-     *              DCT will be performed
-     * @param[in]   coefficientCount - Total coefficients per input
-     *              length
-     * @return      1D vector with inputLength x coefficientCount elements
-     *              populated with DCT coefficients.
-     */
-    virtual std::vector<float> CreateDCTMatrix(
-            const int32_t inputLength,
-            const int32_t coefficientCount);
-
-    /**
-     * @brief       Given the low and high Mel values, get the normaliser
-     *              for weights to be applied when populating the filter
-     *              bank.
-     * @param[in]   leftMel - low Mel frequency value
-     * @param[in]   rightMel - high Mel frequency value
-     * @param[in]   useHTKMethod - bool to signal if HTK method is to be
-     *              used for calculation
-     */
-    virtual float GetMelFilterBankNormaliser(
-            const float&   leftMel,
-            const float&   rightMel,
-            const bool     useHTKMethod);
-
-private:
-
-    std::vector<float>              _m_frame;
-    std::vector<float>              _m_buffer;
-    std::vector<float>              _m_melEnergies;
-    std::vector<float>              _m_windowFunc;
-    std::vector<std::vector<float>> _m_melFilterBank;
-    std::vector<float>              _m_dctMatrix;
-    std::vector<int32_t>            _m_filterBankFilterFirst;
-    std::vector<int32_t>            _m_filterBankFilterLast;
-    bool                            _m_filterBankInitialised;
-
-    /**
-     * @brief       Initialises the filter banks and the DCT matrix **/
-    void _InitMelFilterBank();
-
-    /**
-     * @brief       Signals whether the instance of MFCC has had its
-     *              required buffers initialised
-     * @return      True if initialised, false otherwise
-     **/
-    bool _IsMelFilterBankInited();
-
-    /**
-     * @brief       Create mel filter banks for MFCC calculation.
-     * @return      2D vector of floats
-     **/
-    std::vector<std::vector<float>> _CreateMelFilterBank();
-
-    /**
-     * @brief       Computes and populates internal memeber buffers used
-     *              in MFCC feature calculation
-     * @param[in]   audioData - 1D vector of 16-bit audio data
-     */
-    void _MfccComputePreFeature(const std::vector<float>& audioData);
-
-    /** @brief       Computes the magnitude from an interleaved complex array */
-    void _ConvertToPowerSpectrum();
-
-};
-
diff --git a/samples/SpeechRecognition/include/MathUtils.hpp b/samples/SpeechRecognition/include/MathUtils.hpp
deleted file mode 100644
index 5f81fb6507..0000000000
--- a/samples/SpeechRecognition/include/MathUtils.hpp
+++ /dev/null
@@ -1,85 +0,0 @@
-//
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#include <vector>
-#include <cmath>
-#include <cstdint>
-#include <numeric>
-
-class MathUtils
-{
-
-public:
-
-    /**
-     * @brief       Computes the FFT for the input vector
-     * @param[in]   input       Floating point vector of input elements
-     * @param[out]  fftOutput   Output buffer to be populated by computed
-     *                          FFTs
-     * @return      none
-     */
-    static void FftF32(std::vector<float>& input,
-                       std::vector<float>& fftOutput);
-
-
-    /**
-     * @brief       Computes the dot product of two 1D floating point
-     *              vectors.
-     *              result = sum(srcA[0]*srcB[0] + srcA[1]*srcB[1] + ..)
-     * @param[in]   srcPtrA     pointer to the first element of first
-     *                          array
-     * @param[in]   srcPtrB     pointer to the first element of second
-     *                          array
-     * @param[in]   srcLen      Number of elements in the array/vector
-     * @return      dot product
-     */
-    static float DotProductF32(float* srcPtrA, float* srcPtrB,
-                               const int srcLen);
-
-    /**
-     * @brief       Computes the squared magnitude of floating point
-     *              complex number array.
-     * @param[in]   ptrSrc      pointer to the first element of input
-     *                          array
-     * @param[in]   srcLen      Number of elements in the array/vector
-     * @param[out]  ptrDst      Output buffer to be populated
-     * @param[in]   dstLen      output buffer len (for sanity check only)
-     * @return      true if successful, false otherwise
-     */
-    static bool ComplexMagnitudeSquaredF32(float* ptrSrc,
-                                           const int srcLen,
-                                           float* ptrDst,
-                                           const int dstLen);
-
-    /**
-         * @brief       Computes the natural logarithms of input floating point
-         *              vector
-         * @param[in]   input   Floating point input vector
-         * @param[out]  output  Pre-allocated buffer to be populated with
-         *                      natural log values of each input element
-         * @return      none
-         */
-    static void VecLogarithmF32(std::vector <float>& input,
-                                std::vector <float>& output);
-
-    /**
-         * @brief       Gets the mean of a floating point array of elements
-         * @param[in]   ptrSrc  pointer to the first element
-         * @param[in]   srcLen  Number of elements in the array/vector
-         * @return      average value
-         */
-    static float MeanF32(float* ptrSrc, const uint32_t srcLen);
-
-    /**
-     * @brief       Gets the standard deviation of a floating point array
-     *              of elements
-     * @param[in]   ptrSrc  pointer to the first element
-     * @param[in]   srcLen  Number of elements in the array/vector
-     * @param[in]   mean    pre-computed mean value
-     * @return      standard deviation value
-     */
-    static float StdDevF32(float* ptrSrc, const uint32_t srcLen,
-                           const float mean);
-};
diff --git a/samples/SpeechRecognition/include/SlidingWindow.hpp b/samples/SpeechRecognition/include/SlidingWindow.hpp
deleted file mode 100644
index 791a0b7fc0..0000000000
--- a/samples/SpeechRecognition/include/SlidingWindow.hpp
+++ /dev/null
@@ -1,161 +0,0 @@
-//
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#pragma once
-
-template<class T>
-class SlidingWindow
-{
-protected:
-    T* m_start = nullptr;
-    size_t m_dataSize = 0;
-    size_t m_size = 0;
-    size_t m_stride = 0;
-    size_t m_count = 0;
-public:
-
-    /**
-     * Creates the window slider through the given data.
-     *
-     * @param data          pointer to the data to slide through.
-     * @param dataSize      size in T type elements wise.
-     * @param windowSize    sliding window size in T type wise elements.
-     * @param stride        stride size in T type wise elements.
-     */
-    SlidingWindow(T* data, size_t dataSize,
-                  size_t windowSize, size_t stride)
-    {
-        m_start = data;
-        m_dataSize = dataSize;
-        m_size = windowSize;
-        m_stride = stride;
-    }
-
-    SlidingWindow() = default;
-
-    ~SlidingWindow() = default;
-
-    /**
-     * Get the next data window.
-     * @return pointer to the next window, if next window is not available nullptr is returned.
-     */
-    virtual T* Next()
-    {
-        if (HasNext())
-        {
-            m_count++;
-            return m_start + Index() * m_stride;
-        }
-        else
-        {
-            return nullptr;
-        }
-    }
-
-    /**
-     * Checks if the next data portion is available.
-     * @return true if next data portion is available
-     */
-    bool HasNext()
-    {
-        return this->m_count < 1 + this->FractionalTotalStrides() && (this->NextWindowStartIndex() < this->m_dataSize);
-    }
-
-    /**
-     * Resest the slider to the initial position.
-     */
-    virtual void Reset()
-    {
-        m_count = 0;
-    }
-
-    /**
-     * Resest the slider to the initial position.
-     */
-    virtual size_t GetWindowSize()
-    {
-        return m_size;
-    }
-
-    /**
-     * Resets the slider to the start of the new data.
-     * New data size MUST be the same as the old one.
-     * @param newStart pointer to the new data to slide through.
-     */
-    virtual void Reset(T* newStart)
-    {
-        m_start = newStart;
-        Reset();
-    }
-
-    /**
-     * Gets current index of the sliding window.
-     * @return current position of the sliding window in number of strides
-     */
-    size_t Index()
-    {
-        return m_count == 0? 0: m_count - 1;
-    }
-
-    /**
-     * Gets the index from the start of the data where the next window will begin.
-     * While Index() returns the index of sliding window itself this function returns the index of the data
-     * element itself.
-     * @return Index from the start of the data where the next sliding window will begin.
-     */
-    virtual size_t NextWindowStartIndex()
-    {
-        return m_count == 0? 0: ((m_count) * m_stride);
-    }
-
-    /**
-     * Go to given sliding window index.
-     * @param index new position of the sliding window. if index is invalid (greater than possible range of strides)
-     *              then next call to Next() will return nullptr.
-     */
-    void FastForward(size_t index)
-    {
-        m_count = index;
-    }
-
-    /**
-     * Calculates whole number of times the window can stride through the given data.
-     * @return maximum number of strides.
-     */
-    size_t TotalStrides()
-    {
-        if (m_size > m_dataSize)
-        {
-            return 0;
-        }
-        return ((m_dataSize - m_size)/m_stride);
-    }
-
-    /**
-     * Calculates number of times the window can stride through the given data. May not be a whole number.
-     * @return Number of strides to cover all data.
-     */
-    float FractionalTotalStrides()
-    {
-        if(this->m_size > this->m_dataSize)
-        {
-            return this->m_dataSize / this->m_size;
-        }
-        else
-        {
-            return ((this->m_dataSize - this->m_size)/ static_cast<float>(this->m_stride));
-        }
-
-    }
-
-    /**
-     * Calculates the remaining data left to be processed
-     * @return The remaining unprocessed data
-     */
-    int RemainingData()
-    {
-        return this->m_dataSize - this->NextWindowStartIndex();
-    }
-};
-\ No newline at end of file
diff --git a/samples/SpeechRecognition/include/SpeechRecognitionPipeline.hpp b/samples/SpeechRecognition/include/SpeechRecognitionPipeline.hpp
index 47ce30416f..bc3fbfe151 100644
--- a/samples/SpeechRecognition/include/SpeechRecognitionPipeline.hpp
+++ b/samples/SpeechRecognition/include/SpeechRecognitionPipeline.hpp
@@ -8,16 +8,16 @@
 #include "ArmnnNetworkExecutor.hpp"
 #include "Decoder.hpp"
 #include "MFCC.hpp"
-#include "Preprocess.hpp"
+#include "Wav2LetterPreprocessor.hpp"
 
-namespace asr
+namespace asr 
 {
 /**
  * Generic Speech Recognition pipeline with 3 steps: data pre-processing, inference execution and inference
  * result post-processing.
  *
  */
-class ASRPipeline
+class ASRPipeline 
 {
 public:
 
@@ -27,7 +27,7 @@ public:
      * @param decoder - unique pointer to inference results decoder
      */
     ASRPipeline(std::unique_ptr<common::ArmnnNetworkExecutor<int8_t>> executor,
-                std::unique_ptr<Decoder> decoder);
+                std::unique_ptr<Decoder> decoder, std::unique_ptr<Wav2LetterPreprocessor> preprocessor);
 
     /**
      * @brief Standard audio pre-processing implementation.
@@ -36,20 +36,16 @@ public:
      * extracting the MFCC features.
 
      * @param[in] audio - the raw audio data
-     * @param[out] preprocessor - the preprocessor object, which handles the data prepreration
+     * @param[out] preprocessor - the preprocessor object, which handles the data preparation
      */
-    template<typename Tin,typename Tout>
-    std::vector<Tout> PreProcessing(std::vector<Tin>& audio, Preprocess& preprocessor)
-    {
-        int audioDataToPreProcess = preprocessor._m_windowLen +
-                ((preprocessor._m_mfcc._m_params.m_numMfccVectors -1) *preprocessor._m_windowStride);
-        int outputBufferSize = preprocessor._m_mfcc._m_params.m_numMfccVectors
-                * preprocessor._m_mfcc._m_params.m_numMfccFeatures * 3;
-        std::vector<Tout> outputBuffer(outputBufferSize);
-        preprocessor.Invoke(audio.data(), audioDataToPreProcess, outputBuffer, m_executor->GetQuantizationOffset(),
-                            m_executor->GetQuantizationScale());
-        return outputBuffer;
-    }
+    std::vector<int8_t> PreProcessing(std::vector<float>& audio);
+
+    int getInputSamplesSize();
+    int getSlidingWindowOffset();
+
+    // Exposing hardcoded constant as it can only be derived from model knowledge and not from model itself
+    // Will need to be refactored so that hard coded values are not defined outside of model settings
+    int SLIDING_WINDOW_OFFSET;
 
     /**
      * @brief Executes inference
@@ -60,9 +56,9 @@ public:
      * @param[out] result - raw inference results.
      */
     template<typename T>
-    void Inference(const std::vector<T>& preprocessedData, common::InferenceResults<int8_t>& result)
+    void Inference(const std::vector<T>& preprocessedData, common::InferenceResults<int8_t>& result) 
     {
-        size_t data_bytes = sizeof(std::vector<T>) + (sizeof(T) * preprocessedData.size());
+        size_t data_bytes = sizeof(T) * preprocessedData.size();
         m_executor->Run(preprocessedData.data(), data_bytes, result);
     }
 
@@ -78,9 +74,9 @@ public:
      */
     template<typename T>
     void PostProcessing(common::InferenceResults<int8_t>& inferenceResult,
-                                     bool& isFirstWindow,
-                                     bool isLastWindow,
-                                     std::string currentRContext)
+                        bool& isFirstWindow,
+                        bool isLastWindow,
+                        std::string currentRContext) 
     {
         int rowLength = 29;
         int middleContextStart = 49;
@@ -92,17 +88,17 @@ public:
         std::vector<T> contextToProcess;
 
         // If isFirstWindow we keep the left context of the output
-        if(isFirstWindow)
+        if (isFirstWindow) 
         {
             std::vector<T> chunk(&inferenceResult[0][leftContextStart],
-                    &inferenceResult[0][middleContextEnd * rowLength]);
+                                 &inferenceResult[0][middleContextEnd * rowLength]);
             contextToProcess = chunk;
         }
-        // Else we only keep the middle context of the output
-        else
+        else 
         {
+            // Else we only keep the middle context of the output
             std::vector<T> chunk(&inferenceResult[0][middleContextStart * rowLength],
-                    &inferenceResult[0][middleContextEnd * rowLength]);
+                                 &inferenceResult[0][middleContextEnd * rowLength]);
             contextToProcess = chunk;
         }
         std::string output = this->m_decoder->DecodeOutput<T>(contextToProcess);
@@ -110,10 +106,10 @@ public:
         std::cout << output << std::flush;
 
         // If this is the last window, we print the right context of the output
-        if(isLastWindow)
+        if (isLastWindow) 
         {
-            std::vector<T> rContext(&inferenceResult[0][rightContextStart*rowLength],
-                    &inferenceResult[0][rightContextEnd * rowLength]);
+            std::vector<T> rContext(&inferenceResult[0][rightContextStart * rowLength],
+                                    &inferenceResult[0][rightContextEnd * rowLength]);
             currentRContext = this->m_decoder->DecodeOutput(rContext);
             std::cout << currentRContext << std::endl;
         }
@@ -122,6 +118,7 @@ public:
 protected:
     std::unique_ptr<common::ArmnnNetworkExecutor<int8_t>> m_executor;
     std::unique_ptr<Decoder> m_decoder;
+    std::unique_ptr<Wav2LetterPreprocessor> m_preProcessor;
 };
 
 using IPipelinePtr = std::unique_ptr<asr::ASRPipeline>;
@@ -136,4 +133,4 @@ using IPipelinePtr = std::unique_ptr<asr::ASRPipeline>;
  */
 IPipelinePtr CreatePipeline(common::PipelineOptions& config, std::map<int, std::string>& labels);
 
-}// namespace asr
-\ No newline at end of file
+} // namespace asr
+\ No newline at end of file
diff --git a/samples/SpeechRecognition/include/Wav2LetterMFCC.hpp b/samples/SpeechRecognition/include/Wav2LetterMFCC.hpp
new file mode 100644
index 0000000000..aa88aafb3b
--- /dev/null
+++ b/samples/SpeechRecognition/include/Wav2LetterMFCC.hpp
@@ -0,0 +1,78 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "MFCC.hpp"
+
+/* Class to provide Wav2Letter specific MFCC calculation requirements. */
+class Wav2LetterMFCC : public MFCC 
+{
+
+public:
+    explicit Wav2LetterMFCC(const MfccParams& params)
+        :  MFCC(params)
+    {}
+
+    Wav2LetterMFCC()  = delete;
+    ~Wav2LetterMFCC() = default;
+
+protected:
+
+    /**
+     * @brief       Overrides base class implementation of this function.
+     * @param[in]   fftVec                  Vector populated with FFT magnitudes
+     * @param[in]   melFilterBank           2D Vector with filter bank weights
+     * @param[in]   filterBankFilterFirst   Vector containing the first indices of filter bank
+     *                                      to be used for each bin.
+     * @param[in]   filterBankFilterLast    Vector containing the last indices of filter bank
+     *                                      to be used for each bin.
+     * @param[out]  melEnergies             Pre-allocated vector of MEL energies to be
+     *                                      populated.
+     * @return      true if successful, false otherwise
+     */
+    bool ApplyMelFilterBank(
+        std::vector<float>&                 fftVec,
+        std::vector<std::vector<float>>&    melFilterBank,
+        std::vector<uint32_t>&              filterBankFilterFirst,
+        std::vector<uint32_t>&              filterBankFilterLast,
+        std::vector<float>&                 melEnergies) override;
+
+    /**
+     * @brief           Override for the base class implementation convert mel
+     *                  energies to logarithmic scale. The difference from
+     *                  default behaviour is that the power is converted to dB
+     *                  and subsequently clamped.
+     * @param[in,out]   melEnergies   1D vector of Mel energies
+     **/
+    void ConvertToLogarithmicScale(std::vector<float>& melEnergies) override;
+
+    /**
+     * @brief       Create a matrix used to calculate Discrete Cosine
+     *              Transform. Override for the base class' default
+     *              implementation as the first and last elements
+     *              use a different normaliser.
+     * @param[in]   inputLength        input length of the buffer on which
+     *                                 DCT will be performed
+     * @param[in]   coefficientCount   Total coefficients per input length.
+     * @return      1D vector with inputLength x coefficientCount elements
+     *              populated with DCT coefficients.
+     */
+    std::vector<float> CreateDCTMatrix(int32_t inputLength,
+                                       int32_t coefficientCount) override;
+
+    /**
+     * @brief       Given the low and high Mel values, get the normaliser
+     *              for weights to be applied when populating the filter
+     *              bank. Override for the base class implementation.
+     * @param[in]   leftMel        Low Mel frequency value.
+     * @param[in]   rightMel       High Mel frequency value.
+     * @param[in]   useHTKMethod   bool to signal if HTK method is to be
+     *                             used for calculation.
+     * @return      Value to use for normalising.
+     */
+    float GetMelFilterBankNormaliser(const float&   leftMel,
+                                     const float&   rightMel,
+                                     bool     useHTKMethod) override;
+};
+\ No newline at end of file
diff --git a/samples/SpeechRecognition/include/Preprocess.hpp b/samples/SpeechRecognition/include/Wav2LetterPreprocessor.hpp
index 80c568439b..ebc9e864e3 100644
--- a/samples/SpeechRecognition/include/Preprocess.hpp
+++ b/samples/SpeechRecognition/include/Wav2LetterPreprocessor.hpp
@@ -1,48 +1,23 @@
 //
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
+#ifndef SPEECH_RECOGNITION_EXAMPLE_WAV2LETTERPREPROCESSOR_HPP
+#define SPEECH_RECOGNITION_EXAMPLE_WAV2LETTERPREPROCESSOR_HPP
 
-#pragma once
-
+#include <numeric>
 #include "DataStructures.hpp"
 #include "SlidingWindow.hpp"
-#include <numeric>
 #include "MFCC.hpp"
+#include "Wav2LetterMFCC.hpp"
+// Class to facilitate pre-processing calculation for Wav2Letter model for ASR 
+using AudioWindow = SlidingWindow<const float>;
 
-/* Class to facilitate pre-processing calculation for Wav2Letter model
-     * for ASR */
-using AudioWindow = SlidingWindow <const float>;
-
-class Preprocess
+class Wav2LetterPreprocessor 
 {
 public:
-
-    MFCC                _m_mfcc;            /* MFCC instance */
-
-    /* Actual buffers to be populated */
-    Array2d<float>      _m_mfccBuf;         /* Contiguous buffer 1D: MFCC */
-    Array2d<float>      _m_delta1Buf;       /* Contiguous buffer 1D: Delta 1 */
-    Array2d<float>      _m_delta2Buf;       /* Contiguous buffer 1D: Delta 2 */
-
-    uint32_t            _m_windowLen;       /* Window length for MFCC */
-    uint32_t            _m_windowStride;    /* Window stride len for MFCC */
-    AudioWindow         _m_window;          /* Sliding window */
-
-    /**
-     * @brief       Constructor
-     * @param[in]   numMfccFeatures     number of MFCC features per window
-     * @param[in]   windowLen           number of elements in a window
-     * @param[in]   windowStride        stride (in number of elements) for
-     *                                  moving the window
-     * @param[in]   numMfccVectors      number of MFCC vectors per window
-    */
-    Preprocess(
-            const uint32_t  windowLen,
-            const uint32_t  windowStride,
-            const MFCC mfccInst);
-    Preprocess() = delete;
-    ~Preprocess();
+    Wav2LetterPreprocessor(uint32_t windowLen, uint32_t windowStride,
+                           std::unique_ptr<Wav2LetterMFCC> mfccInst);
 
     /**
      * @brief       Calculates the features required from audio data. This
@@ -55,12 +30,19 @@ public:
      * @param[in]   tensor        tensor to be populated
      * @return      true if successful, false in case of error.
      */
-    bool Invoke(const float* audioData,
-                const uint32_t  audioDataLen,
-                std::vector<int8_t>& output,
-                int quantOffset,
+    bool Invoke(const float* audioData, uint32_t audioDataLen, std::vector<int8_t>& output, int quantOffset,
                 float quantScale);
 
+    std::unique_ptr<MFCC> m_mfcc;
+
+    // Actual buffers to be populated 
+    Array2d<float> m_mfccBuf;         // Contiguous buffer 1D: MFCC 
+    Array2d<float> m_delta1Buf;       // Contiguous buffer 1D: Delta 1 
+    Array2d<float> m_delta2Buf;       // Contiguous buffer 1D: Delta 2
+
+    uint32_t m_windowLen;       // Window length for MFCC 
+    uint32_t m_windowStride;    // Window stride len for MFCC 
+    AudioWindow m_window;       // Sliding window 
 
 protected:
     /**
@@ -73,16 +55,18 @@ protected:
      *
      * @return true if successful, false otherwise
      */
-    static bool _ComputeDeltas(Array2d<float>& mfcc,
-                               Array2d<float>& delta1,
-                               Array2d<float>& delta2);
+    static bool ComputeDeltas(Array2d<float>& mfcc,
+                              Array2d<float>& delta1,
+                              Array2d<float>& delta2);
+
+protected:
 
     /**
      * @brief      Given a 2D vector of floats, computes the mean
      * @param[in]   vec      vector of vector of floats
      * @return      mean value
      */
-    static float _GetMean(Array2d<float>& vec);
+    static float GetMean(Array2d<float>& vec);
 
     /**
      * @brief       Given a 2D vector of floats, computes the stddev
@@ -90,8 +74,7 @@ protected:
      * @param[in]   mean     mean value of the vector passed in
      * @return      stddev value
      */
-    static float _GetStdDev(Array2d<float>& vec,
-                            const float mean);
+    static float GetStdDev(Array2d<float>& vec, float mean);
 
     /**
      * @brief           Given a 2D vector of floats, normalises it using
@@ -99,13 +82,13 @@ protected:
      * @param[in/out]   vec      vector of vector of floats
      * @return
      */
-    static void _NormaliseVec(Array2d<float>& vec);
+    static void NormaliseVec(Array2d<float>& vec);
 
     /**
      * @brief       Normalises the MFCC and delta buffers
      * @return
      */
-    void _Normalise();
+    void Normalise();
 
     /**
      * @brief       Given the quantisation and data type limits, computes
@@ -117,12 +100,12 @@ protected:
      * @param[in]   maxVal          Numerical limit - maximum
      * @return      floating point quantised value
      */
-    static float _GetQuantElem(
-            const float     elem,
-            const float     quantScale,
-            const int       quantOffset,
-            const float     minVal,
-            const float     maxVal);
+    static float GetQuantElem(
+            float elem,
+            float quantScale,
+            int quantOffset,
+            float minVal,
+            float maxVal);
 
     /**
      * @brief       Quantises the MFCC and delta buffers, and places them
@@ -137,39 +120,39 @@ protected:
      * @param[in]   quantScale      quantisation scale
      * @param[in]   quantOffset     quantisation offset
      */
-    template <typename T>
-    bool _Quantise(T* outputBuf, int quantOffset, float quantScale)
+    template<typename T>
+    bool Quantise(T*outputBuf, int quantOffset, float quantScale) 
     {
-        /* Populate */
+        // Populate 
         T* outputBufMfcc = outputBuf;
-        T* outputBufD1 = outputBuf + this->_m_mfcc._m_params.m_numMfccFeatures;
-        T* outputBufD2 = outputBufD1 + this->_m_mfcc._m_params.m_numMfccFeatures;
-        const uint32_t ptrIncr = this->_m_mfcc._m_params.m_numMfccFeatures * 2; /* (3 vectors - 1 vector) */
+        T* outputBufD1 = outputBuf + this->m_mfcc->m_params.m_numMfccFeatures;
+        T* outputBufD2 = outputBufD1 + this->m_mfcc->m_params.m_numMfccFeatures;
+        const uint32_t ptrIncr = this->m_mfcc->m_params.m_numMfccFeatures * 2; // (3 vectors - 1 vector) 
 
         const float minVal = std::numeric_limits<T>::min();
         const float maxVal = std::numeric_limits<T>::max();
 
-        /* We need to do a transpose while copying and concatenating
-         * the tensor*/
-        for (uint32_t j = 0; j < this->_m_mfcc._m_params.m_numMfccVectors; ++j) {
-            for (uint32_t i = 0; i < this->_m_mfcc._m_params.m_numMfccFeatures; ++i)
+        // We need to do a transpose while copying and concatenating the tensor
+        for (uint32_t j = 0; j < this->m_mfcc->m_params.m_numMfccVectors; ++j) 
+        {
+            for (uint32_t i = 0; i < this->m_mfcc->m_params.m_numMfccFeatures; ++i) 
             {
-                *outputBufMfcc++ = static_cast<T>(this->_GetQuantElem(
-                        this->_m_mfccBuf(i, j), quantScale,
+                *outputBufMfcc++ = static_cast<T>(Wav2LetterPreprocessor::GetQuantElem(
+                        this->m_mfccBuf(i, j), quantScale,
                         quantOffset, minVal, maxVal));
-                *outputBufD1++ = static_cast<T>(this->_GetQuantElem(
-                        this->_m_delta1Buf(i, j), quantScale,
+                *outputBufD1++ = static_cast<T>(Wav2LetterPreprocessor::GetQuantElem(
+                        this->m_delta1Buf(i, j), quantScale,
                         quantOffset, minVal, maxVal));
-                *outputBufD2++ = static_cast<T>(this->_GetQuantElem(
-                        this->_m_delta2Buf(i, j), quantScale,
+                *outputBufD2++ = static_cast<T>(Wav2LetterPreprocessor::GetQuantElem(
+                        this->m_delta2Buf(i, j), quantScale,
                         quantOffset, minVal, maxVal));
             }
             outputBufMfcc += ptrIncr;
             outputBufD1 += ptrIncr;
             outputBufD2 += ptrIncr;
         }
-
         return true;
     }
 };
 
+#endif //SPEECH_RECOGNITION_EXAMPLE_WAV2LETTERPREPROCESSOR_HPP