diff options
Diffstat (limited to 'source/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp')
-rw-r--r-- | source/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp | 341 |
1 files changed, 0 insertions, 341 deletions
diff --git a/source/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp b/source/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp deleted file mode 100644 index cbf0e4e..0000000 --- a/source/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp +++ /dev/null @@ -1,341 +0,0 @@ -/* - * Copyright (c) 2021-2022 Arm Limited. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef RNNOISE_FEATURE_PROCESSOR_HPP -#define RNNOISE_FEATURE_PROCESSOR_HPP - -#include "PlatformMath.hpp" -#include <cstdint> -#include <vector> -#include <array> -#include <tuple> - -namespace arm { -namespace app { -namespace rnn { - - using vec1D32F = std::vector<float>; - using vec2D32F = std::vector<vec1D32F>; - using arrHp = std::array<float, 2>; - using math::FftInstance; - using math::FftType; - - class FrameFeatures { - public: - bool m_silence{false}; /* If frame contains silence or not. */ - vec1D32F m_featuresVec{}; /* Calculated feature vector to feed to model. */ - vec1D32F m_fftX{}; /* Vector of floats arranged to represent complex numbers. */ - vec1D32F m_fftP{}; /* Vector of floats arranged to represent complex numbers. */ - vec1D32F m_Ex{}; /* Spectral band energy for audio x. */ - vec1D32F m_Ep{}; /* Spectral band energy for pitch p. */ - vec1D32F m_Exp{}; /* Correlated spectral energy between x and p. */ - }; - - /** - * @brief RNNoise pre and post processing class based on the 2018 paper from - * Jan-Marc Valin. Recommended reading: - * - https://jmvalin.ca/demo/rnnoise/ - * - https://arxiv.org/abs/1709.08243 - **/ - class RNNoiseFeatureProcessor { - /* Public interface */ - public: - RNNoiseFeatureProcessor(); - ~RNNoiseFeatureProcessor() = default; - - /** - * @brief Calculates the features from a given audio buffer ready to be sent to RNNoise model. - * @param[in] audioData Pointer to the floating point vector - * with audio data (within the numerical - * limits of int16_t type). - * @param[in] audioLen Number of elements in the audio window. - * @param[out] features FrameFeatures object reference. - **/ - void PreprocessFrame(const float* audioData, - size_t audioLen, - FrameFeatures& features); - - /** - * @brief Use the RNNoise model output gain values with pre-processing features - * to generate audio with noise suppressed. - * @param[in] modelOutput Output gain values from model. - * @param[in] features Calculated features from pre-processing step. - * @param[out] outFrame Output frame to be populated. - **/ - void PostProcessFrame(vec1D32F& modelOutput, FrameFeatures& features, vec1D32F& outFrame); - - - /* Public constants */ - public: - static constexpr uint32_t FRAME_SIZE_SHIFT{2}; - static constexpr uint32_t FRAME_SIZE{512}; - static constexpr uint32_t WINDOW_SIZE{2 * FRAME_SIZE}; - static constexpr uint32_t FREQ_SIZE{FRAME_SIZE + 1}; - - static constexpr uint32_t PITCH_MIN_PERIOD{64}; - static constexpr uint32_t PITCH_MAX_PERIOD{820}; - static constexpr uint32_t PITCH_FRAME_SIZE{1024}; - static constexpr uint32_t PITCH_BUF_SIZE{PITCH_MAX_PERIOD + PITCH_FRAME_SIZE}; - - static constexpr uint32_t NB_BANDS{22}; - static constexpr uint32_t CEPS_MEM{8}; - static constexpr uint32_t NB_DELTA_CEPS{6}; - - static constexpr uint32_t NB_FEATURES{NB_BANDS + 3*NB_DELTA_CEPS + 2}; - - /* Private functions */ - private: - - /** - * @brief Initialises the half window and DCT tables. - */ - void InitTables(); - - /** - * @brief Applies a bi-quadratic filter over the audio window. - * @param[in] bHp Constant coefficient set b (arrHp type). - * @param[in] aHp Constant coefficient set a (arrHp type). - * @param[in,out] memHpX Coefficients populated by this function. - * @param[in,out] audioWindow Floating point vector with audio data. - **/ - void BiQuad( - const arrHp& bHp, - const arrHp& aHp, - arrHp& memHpX, - vec1D32F& audioWindow); - - /** - * @brief Computes features from the "filtered" audio window. - * @param[in] audioWindow Floating point vector with audio data. - * @param[out] features FrameFeatures object reference. - **/ - void ComputeFrameFeatures(vec1D32F& audioWindow, FrameFeatures& features); - - /** - * @brief Runs analysis on the audio buffer. - * @param[in] audioWindow Floating point vector with audio data. - * @param[out] fft Floating point FFT vector containing real and - * imaginary pairs of elements. NOTE: this vector - * does not contain the mirror image (conjugates) - * part of the spectrum. - * @param[out] energy Computed energy for each band in the Bark scale. - * @param[out] analysisMem Buffer sequentially, but partially, - * populated with new audio data. - **/ - void FrameAnalysis( - const vec1D32F& audioWindow, - vec1D32F& fft, - vec1D32F& energy, - vec1D32F& analysisMem); - - /** - * @brief Applies the window function, in-place, over the given - * floating point buffer. - * @param[in,out] x Buffer the window will be applied to. - **/ - void ApplyWindow(vec1D32F& x); - - /** - * @brief Computes the FFT for a given vector. - * @param[in] x Vector to compute the FFT from. - * @param[out] fft Floating point FFT vector containing real and - * imaginary pairs of elements. NOTE: this vector - * does not contain the mirror image (conjugates) - * part of the spectrum. - **/ - void ForwardTransform( - vec1D32F& x, - vec1D32F& fft); - - /** - * @brief Computes band energy for each of the 22 Bark scale bands. - * @param[in] fft_X FFT spectrum (as computed by ForwardTransform). - * @param[out] bandE Vector with 22 elements populated with energy for - * each band. - **/ - void ComputeBandEnergy(const vec1D32F& fft_X, vec1D32F& bandE); - - /** - * @brief Computes band energy correlation. - * @param[in] X FFT vector X. - * @param[in] P FFT vector P. - * @param[out] bandC Vector with 22 elements populated with band energy - * correlation for the two input FFT vectors. - **/ - void ComputeBandCorr(const vec1D32F& X, const vec1D32F& P, vec1D32F& bandC); - - /** - * @brief Performs pitch auto-correlation for a given vector for - * given lag. - * @param[in] x Input vector. - * @param[out] ac Auto-correlation output vector. - * @param[in] lag Lag value. - * @param[in] n Number of elements to consider for correlation - * computation. - **/ - void AutoCorr(const vec1D32F &x, - vec1D32F &ac, - size_t lag, - size_t n); - - /** - * @brief Computes pitch cross-correlation. - * @param[in] x Input vector 1. - * @param[in] y Input vector 2. - * @param[out] xCorr Cross-correlation output vector. - * @param[in] len Number of elements to consider for correlation. - * computation. - * @param[in] maxPitch Maximum pitch. - **/ - void PitchXCorr( - const vec1D32F& x, - const vec1D32F& y, - vec1D32F& xCorr, - size_t len, - size_t maxPitch); - - /** - * @brief Computes "Linear Predictor Coefficients". - * @param[in] ac Correlation vector. - * @param[in] p Number of elements of input vector to consider. - * @param[out] lpc Output coefficients vector. - **/ - void LPC(const vec1D32F& ac, int32_t p, vec1D32F& lpc); - - /** - * @brief Custom FIR implementation. - * @param[in] num FIR coefficient vector. - * @param[in] N Number of elements. - * @param[out] x Vector to be be processed. - **/ - void Fir5(const vec1D32F& num, uint32_t N, vec1D32F& x); - - /** - * @brief Down-sample the pitch buffer. - * @param[in,out] pitchBuf Pitch buffer. - * @param[in] pitchBufSz Buffer size. - **/ - void PitchDownsample(vec1D32F& pitchBuf, size_t pitchBufSz); - - /** - * @brief Pitch search function. - * @param[in] xLP Shifted pitch buffer input. - * @param[in] y Pitch buffer input. - * @param[in] len Length to search for. - * @param[in] maxPitch Maximum pitch. - * @return pitch index. - **/ - int PitchSearch(vec1D32F& xLp, vec1D32F& y, uint32_t len, uint32_t maxPitch); - - /** - * @brief Finds the "best" pitch from the buffer. - * @param[in] xCorr Pitch correlation vector. - * @param[in] y Pitch buffer input. - * @param[in] len Length to search for. - * @param[in] maxPitch Maximum pitch. - * @return pitch array (2 elements). - **/ - arrHp FindBestPitch(vec1D32F& xCorr, vec1D32F& y, uint32_t len, uint32_t maxPitch); - - /** - * @brief Remove pitch period doubling errors. - * @param[in,out] pitchBuf Pitch buffer vector. - * @param[in] maxPeriod Maximum period. - * @param[in] minPeriod Minimum period. - * @param[in] frameSize Frame size. - * @param[in] pitchIdx0_ Pitch index 0. - * @return pitch index. - **/ - int RemoveDoubling( - vec1D32F& pitchBuf, - uint32_t maxPeriod, - uint32_t minPeriod, - uint32_t frameSize, - size_t pitchIdx0_); - - /** - * @brief Computes pitch gain. - * @param[in] xy Single xy cross correlation value. - * @param[in] xx Single xx auto correlation value. - * @param[in] yy Single yy auto correlation value. - * @return Calculated pitch gain. - **/ - float ComputePitchGain(float xy, float xx, float yy); - - /** - * @brief Computes DCT vector from the given input. - * @param[in] input Input vector. - * @param[out] output Output vector with DCT coefficients. - **/ - void DCT(vec1D32F& input, vec1D32F& output); - - /** - * @brief Perform inverse fourier transform on complex spectral vector. - * @param[out] out Output vector. - * @param[in] fftXIn Vector of floats arranged to represent complex numbers interleaved. - **/ - void InverseTransform(vec1D32F& out, vec1D32F& fftXIn); - - /** - * @brief Perform pitch filtering. - * @param[in] features Object with pre-processing calculated frame features. - * @param[in] g Gain values. - **/ - void PitchFilter(FrameFeatures& features, vec1D32F& g); - - /** - * @brief Interpolate the band gain values. - * @param[out] g Gain values. - * @param[in] bandE Vector with 22 elements populated with energy for - * each band. - **/ - void InterpBandGain(vec1D32F& g, vec1D32F& bandE); - - /** - * @brief Create de-noised frame. - * @param[out] outFrame Output vector for storing the created audio frame. - * @param[in] fftY Gain adjusted complex spectral vector. - */ - void FrameSynthesis(vec1D32F& outFrame, vec1D32F& fftY); - - /* Private objects */ - private: - FftInstance m_fftInstReal; /* FFT instance for real numbers */ - FftInstance m_fftInstCmplx; /* FFT instance for complex numbers */ - vec1D32F m_halfWindow; /* Window coefficients */ - vec1D32F m_dctTable; /* DCT table */ - vec1D32F m_analysisMem; /* Buffer used for frame analysis */ - vec2D32F m_cepstralMem; /* Cepstral coefficients */ - size_t m_memId; /* memory ID */ - vec1D32F m_synthesisMem; /* Synthesis mem (used by post-processing) */ - vec1D32F m_pitchBuf; /* Pitch buffer */ - float m_lastGain; /* Last gain calculated */ - int m_lastPeriod; /* Last period calculated */ - arrHp m_memHpX; /* HpX coefficients. */ - vec1D32F m_lastGVec; /* Last gain vector (used by post-processing) */ - - /* Constants */ - const std::array <uint32_t, NB_BANDS> m_eband5ms { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, - 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100}; - }; - - -} /* namespace rnn */ -} /* namespace app */ -} /* namespace arm */ - -#endif /* RNNOISE_FEATURE_PROCESSOR_HPP */ |