summaryrefslogtreecommitdiff
path: root/source/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'source/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp')
-rw-r--r--source/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp341
1 files changed, 341 insertions, 0 deletions
diff --git a/source/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp b/source/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp
new file mode 100644
index 0000000..cbf0e4e
--- /dev/null
+++ b/source/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RNNOISE_FEATURE_PROCESSOR_HPP
+#define RNNOISE_FEATURE_PROCESSOR_HPP
+
+#include "PlatformMath.hpp"
+#include <cstdint>
+#include <vector>
+#include <array>
+#include <tuple>
+
+namespace arm {
+namespace app {
+namespace rnn {
+
+ using vec1D32F = std::vector<float>;
+ using vec2D32F = std::vector<vec1D32F>;
+ using arrHp = std::array<float, 2>;
+ using math::FftInstance;
+ using math::FftType;
+
+ class FrameFeatures {
+ public:
+ bool m_silence{false}; /* If frame contains silence or not. */
+ vec1D32F m_featuresVec{}; /* Calculated feature vector to feed to model. */
+ vec1D32F m_fftX{}; /* Vector of floats arranged to represent complex numbers. */
+ vec1D32F m_fftP{}; /* Vector of floats arranged to represent complex numbers. */
+ vec1D32F m_Ex{}; /* Spectral band energy for audio x. */
+ vec1D32F m_Ep{}; /* Spectral band energy for pitch p. */
+ vec1D32F m_Exp{}; /* Correlated spectral energy between x and p. */
+ };
+
+ /**
+ * @brief RNNoise pre and post processing class based on the 2018 paper from
+ * Jan-Marc Valin. Recommended reading:
+ * - https://jmvalin.ca/demo/rnnoise/
+ * - https://arxiv.org/abs/1709.08243
+ **/
+ class RNNoiseFeatureProcessor {
+ /* Public interface */
+ public:
+ RNNoiseFeatureProcessor();
+ ~RNNoiseFeatureProcessor() = default;
+
+ /**
+ * @brief Calculates the features from a given audio buffer ready to be sent to RNNoise model.
+ * @param[in] audioData Pointer to the floating point vector
+ * with audio data (within the numerical
+ * limits of int16_t type).
+ * @param[in] audioLen Number of elements in the audio window.
+ * @param[out] features FrameFeatures object reference.
+ **/
+ void PreprocessFrame(const float* audioData,
+ size_t audioLen,
+ FrameFeatures& features);
+
+ /**
+ * @brief Use the RNNoise model output gain values with pre-processing features
+ * to generate audio with noise suppressed.
+ * @param[in] modelOutput Output gain values from model.
+ * @param[in] features Calculated features from pre-processing step.
+ * @param[out] outFrame Output frame to be populated.
+ **/
+ void PostProcessFrame(vec1D32F& modelOutput, FrameFeatures& features, vec1D32F& outFrame);
+
+
+ /* Public constants */
+ public:
+ static constexpr uint32_t FRAME_SIZE_SHIFT{2};
+ static constexpr uint32_t FRAME_SIZE{512};
+ static constexpr uint32_t WINDOW_SIZE{2 * FRAME_SIZE};
+ static constexpr uint32_t FREQ_SIZE{FRAME_SIZE + 1};
+
+ static constexpr uint32_t PITCH_MIN_PERIOD{64};
+ static constexpr uint32_t PITCH_MAX_PERIOD{820};
+ static constexpr uint32_t PITCH_FRAME_SIZE{1024};
+ static constexpr uint32_t PITCH_BUF_SIZE{PITCH_MAX_PERIOD + PITCH_FRAME_SIZE};
+
+ static constexpr uint32_t NB_BANDS{22};
+ static constexpr uint32_t CEPS_MEM{8};
+ static constexpr uint32_t NB_DELTA_CEPS{6};
+
+ static constexpr uint32_t NB_FEATURES{NB_BANDS + 3*NB_DELTA_CEPS + 2};
+
+ /* Private functions */
+ private:
+
+ /**
+ * @brief Initialises the half window and DCT tables.
+ */
+ void InitTables();
+
+ /**
+ * @brief Applies a bi-quadratic filter over the audio window.
+ * @param[in] bHp Constant coefficient set b (arrHp type).
+ * @param[in] aHp Constant coefficient set a (arrHp type).
+ * @param[in,out] memHpX Coefficients populated by this function.
+ * @param[in,out] audioWindow Floating point vector with audio data.
+ **/
+ void BiQuad(
+ const arrHp& bHp,
+ const arrHp& aHp,
+ arrHp& memHpX,
+ vec1D32F& audioWindow);
+
+ /**
+ * @brief Computes features from the "filtered" audio window.
+ * @param[in] audioWindow Floating point vector with audio data.
+ * @param[out] features FrameFeatures object reference.
+ **/
+ void ComputeFrameFeatures(vec1D32F& audioWindow, FrameFeatures& features);
+
+ /**
+ * @brief Runs analysis on the audio buffer.
+ * @param[in] audioWindow Floating point vector with audio data.
+ * @param[out] fft Floating point FFT vector containing real and
+ * imaginary pairs of elements. NOTE: this vector
+ * does not contain the mirror image (conjugates)
+ * part of the spectrum.
+ * @param[out] energy Computed energy for each band in the Bark scale.
+ * @param[out] analysisMem Buffer sequentially, but partially,
+ * populated with new audio data.
+ **/
+ void FrameAnalysis(
+ const vec1D32F& audioWindow,
+ vec1D32F& fft,
+ vec1D32F& energy,
+ vec1D32F& analysisMem);
+
+ /**
+ * @brief Applies the window function, in-place, over the given
+ * floating point buffer.
+ * @param[in,out] x Buffer the window will be applied to.
+ **/
+ void ApplyWindow(vec1D32F& x);
+
+ /**
+ * @brief Computes the FFT for a given vector.
+ * @param[in] x Vector to compute the FFT from.
+ * @param[out] fft Floating point FFT vector containing real and
+ * imaginary pairs of elements. NOTE: this vector
+ * does not contain the mirror image (conjugates)
+ * part of the spectrum.
+ **/
+ void ForwardTransform(
+ vec1D32F& x,
+ vec1D32F& fft);
+
+ /**
+ * @brief Computes band energy for each of the 22 Bark scale bands.
+ * @param[in] fft_X FFT spectrum (as computed by ForwardTransform).
+ * @param[out] bandE Vector with 22 elements populated with energy for
+ * each band.
+ **/
+ void ComputeBandEnergy(const vec1D32F& fft_X, vec1D32F& bandE);
+
+ /**
+ * @brief Computes band energy correlation.
+ * @param[in] X FFT vector X.
+ * @param[in] P FFT vector P.
+ * @param[out] bandC Vector with 22 elements populated with band energy
+ * correlation for the two input FFT vectors.
+ **/
+ void ComputeBandCorr(const vec1D32F& X, const vec1D32F& P, vec1D32F& bandC);
+
+ /**
+ * @brief Performs pitch auto-correlation for a given vector for
+ * given lag.
+ * @param[in] x Input vector.
+ * @param[out] ac Auto-correlation output vector.
+ * @param[in] lag Lag value.
+ * @param[in] n Number of elements to consider for correlation
+ * computation.
+ **/
+ void AutoCorr(const vec1D32F &x,
+ vec1D32F &ac,
+ size_t lag,
+ size_t n);
+
+ /**
+ * @brief Computes pitch cross-correlation.
+ * @param[in] x Input vector 1.
+ * @param[in] y Input vector 2.
+ * @param[out] xCorr Cross-correlation output vector.
+ * @param[in] len Number of elements to consider for correlation.
+ * computation.
+ * @param[in] maxPitch Maximum pitch.
+ **/
+ void PitchXCorr(
+ const vec1D32F& x,
+ const vec1D32F& y,
+ vec1D32F& xCorr,
+ size_t len,
+ size_t maxPitch);
+
+ /**
+ * @brief Computes "Linear Predictor Coefficients".
+ * @param[in] ac Correlation vector.
+ * @param[in] p Number of elements of input vector to consider.
+ * @param[out] lpc Output coefficients vector.
+ **/
+ void LPC(const vec1D32F& ac, int32_t p, vec1D32F& lpc);
+
+ /**
+ * @brief Custom FIR implementation.
+ * @param[in] num FIR coefficient vector.
+ * @param[in] N Number of elements.
+ * @param[out] x Vector to be be processed.
+ **/
+ void Fir5(const vec1D32F& num, uint32_t N, vec1D32F& x);
+
+ /**
+ * @brief Down-sample the pitch buffer.
+ * @param[in,out] pitchBuf Pitch buffer.
+ * @param[in] pitchBufSz Buffer size.
+ **/
+ void PitchDownsample(vec1D32F& pitchBuf, size_t pitchBufSz);
+
+ /**
+ * @brief Pitch search function.
+ * @param[in] xLP Shifted pitch buffer input.
+ * @param[in] y Pitch buffer input.
+ * @param[in] len Length to search for.
+ * @param[in] maxPitch Maximum pitch.
+ * @return pitch index.
+ **/
+ int PitchSearch(vec1D32F& xLp, vec1D32F& y, uint32_t len, uint32_t maxPitch);
+
+ /**
+ * @brief Finds the "best" pitch from the buffer.
+ * @param[in] xCorr Pitch correlation vector.
+ * @param[in] y Pitch buffer input.
+ * @param[in] len Length to search for.
+ * @param[in] maxPitch Maximum pitch.
+ * @return pitch array (2 elements).
+ **/
+ arrHp FindBestPitch(vec1D32F& xCorr, vec1D32F& y, uint32_t len, uint32_t maxPitch);
+
+ /**
+ * @brief Remove pitch period doubling errors.
+ * @param[in,out] pitchBuf Pitch buffer vector.
+ * @param[in] maxPeriod Maximum period.
+ * @param[in] minPeriod Minimum period.
+ * @param[in] frameSize Frame size.
+ * @param[in] pitchIdx0_ Pitch index 0.
+ * @return pitch index.
+ **/
+ int RemoveDoubling(
+ vec1D32F& pitchBuf,
+ uint32_t maxPeriod,
+ uint32_t minPeriod,
+ uint32_t frameSize,
+ size_t pitchIdx0_);
+
+ /**
+ * @brief Computes pitch gain.
+ * @param[in] xy Single xy cross correlation value.
+ * @param[in] xx Single xx auto correlation value.
+ * @param[in] yy Single yy auto correlation value.
+ * @return Calculated pitch gain.
+ **/
+ float ComputePitchGain(float xy, float xx, float yy);
+
+ /**
+ * @brief Computes DCT vector from the given input.
+ * @param[in] input Input vector.
+ * @param[out] output Output vector with DCT coefficients.
+ **/
+ void DCT(vec1D32F& input, vec1D32F& output);
+
+ /**
+ * @brief Perform inverse fourier transform on complex spectral vector.
+ * @param[out] out Output vector.
+ * @param[in] fftXIn Vector of floats arranged to represent complex numbers interleaved.
+ **/
+ void InverseTransform(vec1D32F& out, vec1D32F& fftXIn);
+
+ /**
+ * @brief Perform pitch filtering.
+ * @param[in] features Object with pre-processing calculated frame features.
+ * @param[in] g Gain values.
+ **/
+ void PitchFilter(FrameFeatures& features, vec1D32F& g);
+
+ /**
+ * @brief Interpolate the band gain values.
+ * @param[out] g Gain values.
+ * @param[in] bandE Vector with 22 elements populated with energy for
+ * each band.
+ **/
+ void InterpBandGain(vec1D32F& g, vec1D32F& bandE);
+
+ /**
+ * @brief Create de-noised frame.
+ * @param[out] outFrame Output vector for storing the created audio frame.
+ * @param[in] fftY Gain adjusted complex spectral vector.
+ */
+ void FrameSynthesis(vec1D32F& outFrame, vec1D32F& fftY);
+
+ /* Private objects */
+ private:
+ FftInstance m_fftInstReal; /* FFT instance for real numbers */
+ FftInstance m_fftInstCmplx; /* FFT instance for complex numbers */
+ vec1D32F m_halfWindow; /* Window coefficients */
+ vec1D32F m_dctTable; /* DCT table */
+ vec1D32F m_analysisMem; /* Buffer used for frame analysis */
+ vec2D32F m_cepstralMem; /* Cepstral coefficients */
+ size_t m_memId; /* memory ID */
+ vec1D32F m_synthesisMem; /* Synthesis mem (used by post-processing) */
+ vec1D32F m_pitchBuf; /* Pitch buffer */
+ float m_lastGain; /* Last gain calculated */
+ int m_lastPeriod; /* Last period calculated */
+ arrHp m_memHpX; /* HpX coefficients. */
+ vec1D32F m_lastGVec; /* Last gain vector (used by post-processing) */
+
+ /* Constants */
+ const std::array <uint32_t, NB_BANDS> m_eband5ms {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12,
+ 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100};
+ };
+
+
+} /* namespace rnn */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* RNNOISE_FEATURE_PROCESSOR_HPP */