summaryrefslogtreecommitdiff
path: root/source/use_case/noise_reduction/src
diff options
context:
space:
mode:
authorRichard Burton <richard.burton@arm.com>2022-05-04 09:45:02 +0100
committerRichard Burton <richard.burton@arm.com>2022-05-04 09:45:02 +0100
commit4e002791bc6781b549c6951cfe44f918289d7e82 (patch)
treeb639243b5fa433657c207783a384bad1ed248536 /source/use_case/noise_reduction/src
parentdd6d07b24bbf9023ebe8e8927be8aac3291d0f58 (diff)
downloadml-embedded-evaluation-kit-4e002791bc6781b549c6951cfe44f918289d7e82.tar.gz
MLECO-3173: Add AD, KWS_ASR and Noise reduction use case API's
Signed-off-by: Richard Burton <richard.burton@arm.com> Change-Id: I36f61ce74bf17f7b327cdae9704a22ca54144f37
Diffstat (limited to 'source/use_case/noise_reduction/src')
-rw-r--r--source/use_case/noise_reduction/src/MainLoop.cc4
-rw-r--r--source/use_case/noise_reduction/src/RNNoiseFeatureProcessor.cc (renamed from source/use_case/noise_reduction/src/RNNoiseProcess.cc)60
-rw-r--r--source/use_case/noise_reduction/src/RNNoiseProcessing.cc100
-rw-r--r--source/use_case/noise_reduction/src/UseCaseHandler.cc129
4 files changed, 175 insertions, 118 deletions
diff --git a/source/use_case/noise_reduction/src/MainLoop.cc b/source/use_case/noise_reduction/src/MainLoop.cc
index 5fd7823..fd72127 100644
--- a/source/use_case/noise_reduction/src/MainLoop.cc
+++ b/source/use_case/noise_reduction/src/MainLoop.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,12 +14,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-#include "hal.h" /* Brings in platform definitions. */
#include "UseCaseHandler.hpp" /* Handlers for different user options. */
#include "UseCaseCommonUtils.hpp" /* Utils functions. */
#include "RNNoiseModel.hpp" /* Model class for running inference. */
#include "InputFiles.hpp" /* For input audio clips. */
-#include "RNNoiseProcess.hpp" /* Pre-processing class */
#include "log_macros.h"
enum opcodes
diff --git a/source/use_case/noise_reduction/src/RNNoiseProcess.cc b/source/use_case/noise_reduction/src/RNNoiseFeatureProcessor.cc
index 4c568fa..036894c 100644
--- a/source/use_case/noise_reduction/src/RNNoiseProcess.cc
+++ b/source/use_case/noise_reduction/src/RNNoiseFeatureProcessor.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-#include "RNNoiseProcess.hpp"
+#include "RNNoiseFeatureProcessor.hpp"
#include "log_macros.h"
#include <algorithm>
@@ -33,7 +33,7 @@ do { \
} \
} while(0)
-RNNoiseProcess::RNNoiseProcess() :
+RNNoiseFeatureProcessor::RNNoiseFeatureProcessor() :
m_halfWindow(FRAME_SIZE, 0),
m_dctTable(NB_BANDS * NB_BANDS),
m_analysisMem(FRAME_SIZE, 0),
@@ -54,9 +54,9 @@ RNNoiseProcess::RNNoiseProcess() :
this->InitTables();
}
-void RNNoiseProcess::PreprocessFrame(const float* audioData,
- const size_t audioLen,
- FrameFeatures& features)
+void RNNoiseFeatureProcessor::PreprocessFrame(const float* audioData,
+ const size_t audioLen,
+ FrameFeatures& features)
{
/* Note audioWindow is modified in place */
const arrHp aHp {-1.99599, 0.99600 };
@@ -68,7 +68,7 @@ void RNNoiseProcess::PreprocessFrame(const float* audioData,
this->ComputeFrameFeatures(audioWindow, features);
}
-void RNNoiseProcess::PostProcessFrame(vec1D32F& modelOutput, FrameFeatures& features, vec1D32F& outFrame)
+void RNNoiseFeatureProcessor::PostProcessFrame(vec1D32F& modelOutput, FrameFeatures& features, vec1D32F& outFrame)
{
std::vector<float> outputBands = modelOutput;
std::vector<float> gain(FREQ_SIZE, 0);
@@ -92,7 +92,7 @@ void RNNoiseProcess::PostProcessFrame(vec1D32F& modelOutput, FrameFeatures& feat
FrameSynthesis(outFrame, features.m_fftX);
}
-void RNNoiseProcess::InitTables()
+void RNNoiseFeatureProcessor::InitTables()
{
constexpr float pi = M_PI;
constexpr float halfPi = M_PI / 2;
@@ -111,7 +111,7 @@ void RNNoiseProcess::InitTables()
}
}
-void RNNoiseProcess::BiQuad(
+void RNNoiseFeatureProcessor::BiQuad(
const arrHp& bHp,
const arrHp& aHp,
arrHp& memHpX,
@@ -126,8 +126,8 @@ void RNNoiseProcess::BiQuad(
}
}
-void RNNoiseProcess::ComputeFrameFeatures(vec1D32F& audioWindow,
- FrameFeatures& features)
+void RNNoiseFeatureProcessor::ComputeFrameFeatures(vec1D32F& audioWindow,
+ FrameFeatures& features)
{
this->FrameAnalysis(audioWindow,
features.m_fftX,
@@ -264,7 +264,7 @@ void RNNoiseProcess::ComputeFrameFeatures(vec1D32F& audioWindow,
features.m_featuresVec[NB_BANDS + 3 * NB_DELTA_CEPS + 1] = specVariability / CEPS_MEM - 2.1;
}
-void RNNoiseProcess::FrameAnalysis(
+void RNNoiseFeatureProcessor::FrameAnalysis(
const vec1D32F& audioWindow,
vec1D32F& fft,
vec1D32F& energy,
@@ -289,7 +289,7 @@ void RNNoiseProcess::FrameAnalysis(
ComputeBandEnergy(fft, energy);
}
-void RNNoiseProcess::ApplyWindow(vec1D32F& x)
+void RNNoiseFeatureProcessor::ApplyWindow(vec1D32F& x)
{
if (WINDOW_SIZE != x.size()) {
printf_err("Invalid size for vector to be windowed\n");
@@ -305,7 +305,7 @@ void RNNoiseProcess::ApplyWindow(vec1D32F& x)
}
}
-void RNNoiseProcess::ForwardTransform(
+void RNNoiseFeatureProcessor::ForwardTransform(
vec1D32F& x,
vec1D32F& fft)
{
@@ -327,7 +327,7 @@ void RNNoiseProcess::ForwardTransform(
* first half of the FFT's. The conjugates are not present. */
}
-void RNNoiseProcess::ComputeBandEnergy(const vec1D32F& fftX, vec1D32F& bandE)
+void RNNoiseFeatureProcessor::ComputeBandEnergy(const vec1D32F& fftX, vec1D32F& bandE)
{
bandE = vec1D32F(NB_BANDS, 0);
@@ -351,7 +351,7 @@ void RNNoiseProcess::ComputeBandEnergy(const vec1D32F& fftX, vec1D32F& bandE)
bandE[NB_BANDS - 1] *= 2;
}
-void RNNoiseProcess::ComputeBandCorr(const vec1D32F& X, const vec1D32F& P, vec1D32F& bandC)
+void RNNoiseFeatureProcessor::ComputeBandCorr(const vec1D32F& X, const vec1D32F& P, vec1D32F& bandC)
{
bandC = vec1D32F(NB_BANDS, 0);
VERIFY(this->m_eband5ms.size() >= NB_BANDS);
@@ -374,7 +374,7 @@ void RNNoiseProcess::ComputeBandCorr(const vec1D32F& X, const vec1D32F& P, vec1D
bandC[NB_BANDS - 1] *= 2;
}
-void RNNoiseProcess::DCT(vec1D32F& input, vec1D32F& output)
+void RNNoiseFeatureProcessor::DCT(vec1D32F& input, vec1D32F& output)
{
VERIFY(this->m_dctTable.size() >= NB_BANDS * NB_BANDS);
for (uint32_t i = 0; i < NB_BANDS; ++i) {
@@ -387,7 +387,7 @@ void RNNoiseProcess::DCT(vec1D32F& input, vec1D32F& output)
}
}
-void RNNoiseProcess::PitchDownsample(vec1D32F& pitchBuf, size_t pitchBufSz) {
+void RNNoiseFeatureProcessor::PitchDownsample(vec1D32F& pitchBuf, size_t pitchBufSz) {
for (size_t i = 1; i < (pitchBufSz >> 1); ++i) {
pitchBuf[i] = 0.5 * (
0.5 * (this->m_pitchBuf[2 * i - 1] + this->m_pitchBuf[2 * i + 1])
@@ -431,7 +431,7 @@ void RNNoiseProcess::PitchDownsample(vec1D32F& pitchBuf, size_t pitchBufSz) {
this->Fir5(lpc2, pitchBufSz >> 1, pitchBuf);
}
-int RNNoiseProcess::PitchSearch(vec1D32F& xLp, vec1D32F& y, uint32_t len, uint32_t maxPitch) {
+int RNNoiseFeatureProcessor::PitchSearch(vec1D32F& xLp, vec1D32F& y, uint32_t len, uint32_t maxPitch) {
uint32_t lag = len + maxPitch;
vec1D32F xLp4(len >> 2, 0);
vec1D32F yLp4(lag >> 2, 0);
@@ -488,7 +488,7 @@ int RNNoiseProcess::PitchSearch(vec1D32F& xLp, vec1D32F& y, uint32_t len, uint32
return 2*bestPitch[0] - offset;
}
-arrHp RNNoiseProcess::FindBestPitch(vec1D32F& xCorr, vec1D32F& y, uint32_t len, uint32_t maxPitch)
+arrHp RNNoiseFeatureProcessor::FindBestPitch(vec1D32F& xCorr, vec1D32F& y, uint32_t len, uint32_t maxPitch)
{
float Syy = 1;
arrHp bestNum {-1, -1};
@@ -527,7 +527,7 @@ arrHp RNNoiseProcess::FindBestPitch(vec1D32F& xCorr, vec1D32F& y, uint32_t len,
return bestPitch;
}
-int RNNoiseProcess::RemoveDoubling(
+int RNNoiseFeatureProcessor::RemoveDoubling(
vec1D32F& pitchBuf,
uint32_t maxPeriod,
uint32_t minPeriod,
@@ -679,12 +679,12 @@ int RNNoiseProcess::RemoveDoubling(
return this->m_lastPeriod;
}
-float RNNoiseProcess::ComputePitchGain(float xy, float xx, float yy)
+float RNNoiseFeatureProcessor::ComputePitchGain(float xy, float xx, float yy)
{
return xy / math::MathUtils::SqrtF32(1+xx*yy);
}
-void RNNoiseProcess::AutoCorr(
+void RNNoiseFeatureProcessor::AutoCorr(
const vec1D32F& x,
vec1D32F& ac,
size_t lag,
@@ -711,7 +711,7 @@ void RNNoiseProcess::AutoCorr(
}
-void RNNoiseProcess::PitchXCorr(
+void RNNoiseFeatureProcessor::PitchXCorr(
const vec1D32F& x,
const vec1D32F& y,
vec1D32F& xCorr,
@@ -728,7 +728,7 @@ void RNNoiseProcess::PitchXCorr(
}
/* Linear predictor coefficients */
-void RNNoiseProcess::LPC(
+void RNNoiseFeatureProcessor::LPC(
const vec1D32F& correlation,
int32_t p,
vec1D32F& lpc)
@@ -766,7 +766,7 @@ void RNNoiseProcess::LPC(
}
}
-void RNNoiseProcess::Fir5(
+void RNNoiseFeatureProcessor::Fir5(
const vec1D32F &num,
uint32_t N,
vec1D32F &x)
@@ -794,7 +794,7 @@ void RNNoiseProcess::Fir5(
}
}
-void RNNoiseProcess::PitchFilter(FrameFeatures &features, vec1D32F &gain) {
+void RNNoiseFeatureProcessor::PitchFilter(FrameFeatures &features, vec1D32F &gain) {
std::vector<float> r(NB_BANDS, 0);
std::vector<float> rf(FREQ_SIZE, 0);
std::vector<float> newE(NB_BANDS);
@@ -835,7 +835,7 @@ void RNNoiseProcess::PitchFilter(FrameFeatures &features, vec1D32F &gain) {
}
}
-void RNNoiseProcess::FrameSynthesis(vec1D32F& outFrame, vec1D32F& fftY) {
+void RNNoiseFeatureProcessor::FrameSynthesis(vec1D32F& outFrame, vec1D32F& fftY) {
std::vector<float> x(WINDOW_SIZE, 0);
InverseTransform(x, fftY);
ApplyWindow(x);
@@ -845,7 +845,7 @@ void RNNoiseProcess::FrameSynthesis(vec1D32F& outFrame, vec1D32F& fftY) {
memcpy((m_synthesisMem.data()), &x[FRAME_SIZE], FRAME_SIZE*sizeof(float));
}
-void RNNoiseProcess::InterpBandGain(vec1D32F& g, vec1D32F& bandE) {
+void RNNoiseFeatureProcessor::InterpBandGain(vec1D32F& g, vec1D32F& bandE) {
for (size_t i = 0; i < NB_BANDS - 1; i++) {
int bandSize = (m_eband5ms[i + 1] - m_eband5ms[i]) << FRAME_SIZE_SHIFT;
for (int j = 0; j < bandSize; j++) {
@@ -855,7 +855,7 @@ void RNNoiseProcess::InterpBandGain(vec1D32F& g, vec1D32F& bandE) {
}
}
-void RNNoiseProcess::InverseTransform(vec1D32F& out, vec1D32F& fftXIn) {
+void RNNoiseFeatureProcessor::InverseTransform(vec1D32F& out, vec1D32F& fftXIn) {
std::vector<float> x(WINDOW_SIZE * 2); /* This is complex. */
vec1D32F newFFT; /* This is complex. */
diff --git a/source/use_case/noise_reduction/src/RNNoiseProcessing.cc b/source/use_case/noise_reduction/src/RNNoiseProcessing.cc
new file mode 100644
index 0000000..f6a3ec4
--- /dev/null
+++ b/source/use_case/noise_reduction/src/RNNoiseProcessing.cc
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "RNNoiseProcessing.hpp"
+#include "log_macros.h"
+
+namespace arm {
+namespace app {
+
+ RNNoisePreProcess::RNNoisePreProcess(TfLiteTensor* inputTensor,
+ std::shared_ptr<rnn::RNNoiseFeatureProcessor> featureProcessor, std::shared_ptr<rnn::FrameFeatures> frameFeatures)
+ : m_inputTensor{inputTensor},
+ m_featureProcessor{featureProcessor},
+ m_frameFeatures{frameFeatures}
+ {}
+
+ bool RNNoisePreProcess::DoPreProcess(const void* data, size_t inputSize)
+ {
+ if (data == nullptr) {
+ printf_err("Data pointer is null");
+ return false;
+ }
+
+ auto input = static_cast<const int16_t*>(data);
+ this->m_audioFrame = rnn::vec1D32F(input, input + inputSize);
+ m_featureProcessor->PreprocessFrame(this->m_audioFrame.data(), inputSize, *this->m_frameFeatures);
+
+ QuantizeAndPopulateInput(this->m_frameFeatures->m_featuresVec,
+ this->m_inputTensor->params.scale, this->m_inputTensor->params.zero_point,
+ this->m_inputTensor);
+
+ debug("Input tensor populated \n");
+
+ return true;
+ }
+
+ void RNNoisePreProcess::QuantizeAndPopulateInput(rnn::vec1D32F& inputFeatures,
+ const float quantScale, const int quantOffset,
+ TfLiteTensor* inputTensor)
+ {
+ const float minVal = std::numeric_limits<int8_t>::min();
+ const float maxVal = std::numeric_limits<int8_t>::max();
+
+ auto* inputTensorData = tflite::GetTensorData<int8_t>(inputTensor);
+
+ for (size_t i=0; i < inputFeatures.size(); ++i) {
+ float quantValue = ((inputFeatures[i] / quantScale) + quantOffset);
+ inputTensorData[i] = static_cast<int8_t>(std::min<float>(std::max<float>(quantValue, minVal), maxVal));
+ }
+ }
+
+ RNNoisePostProcess::RNNoisePostProcess(TfLiteTensor* outputTensor,
+ std::vector<int16_t>& denoisedAudioFrame,
+ std::shared_ptr<rnn::RNNoiseFeatureProcessor> featureProcessor,
+ std::shared_ptr<rnn::FrameFeatures> frameFeatures)
+ : m_outputTensor{outputTensor},
+ m_denoisedAudioFrame{denoisedAudioFrame},
+ m_featureProcessor{featureProcessor},
+ m_frameFeatures{frameFeatures}
+ {
+ this->m_denoisedAudioFrameFloat.reserve(denoisedAudioFrame.size());
+ this->m_modelOutputFloat.resize(outputTensor->bytes);
+ }
+
+ bool RNNoisePostProcess::DoPostProcess()
+ {
+ const auto* outputData = tflite::GetTensorData<int8_t>(this->m_outputTensor);
+ auto outputQuantParams = GetTensorQuantParams(this->m_outputTensor);
+
+ for (size_t i = 0; i < this->m_outputTensor->bytes; ++i) {
+ this->m_modelOutputFloat[i] = (static_cast<float>(outputData[i]) - outputQuantParams.offset)
+ * outputQuantParams.scale;
+ }
+
+ this->m_featureProcessor->PostProcessFrame(this->m_modelOutputFloat,
+ *this->m_frameFeatures, this->m_denoisedAudioFrameFloat);
+
+ for (size_t i = 0; i < this->m_denoisedAudioFrame.size(); ++i) {
+ this->m_denoisedAudioFrame[i] = static_cast<int16_t>(
+ std::roundf(this->m_denoisedAudioFrameFloat[i]));
+ }
+
+ return true;
+ }
+
+} /* namespace app */
+} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/noise_reduction/src/UseCaseHandler.cc b/source/use_case/noise_reduction/src/UseCaseHandler.cc
index acb8ba7..53bb43e 100644
--- a/source/use_case/noise_reduction/src/UseCaseHandler.cc
+++ b/source/use_case/noise_reduction/src/UseCaseHandler.cc
@@ -21,12 +21,10 @@
#include "ImageUtils.hpp"
#include "InputFiles.hpp"
#include "RNNoiseModel.hpp"
-#include "RNNoiseProcess.hpp"
+#include "RNNoiseFeatureProcessor.hpp"
+#include "RNNoiseProcessing.hpp"
#include "log_macros.h"
-#include <cmath>
-#include <algorithm>
-
namespace arm {
namespace app {
@@ -36,17 +34,6 @@ namespace app {
**/
static void IncrementAppCtxClipIdx(ApplicationContext& ctx);
- /**
- * @brief Quantize the given features and populate the input Tensor.
- * @param[in] inputFeatures Vector of floating point features to quantize.
- * @param[in] quantScale Quantization scale for the inputTensor.
- * @param[in] quantOffset Quantization offset for the inputTensor.
- * @param[in,out] inputTensor TFLite micro tensor to populate.
- **/
- static void QuantizeAndPopulateInput(rnn::vec1D32F& inputFeatures,
- float quantScale, int quantOffset,
- TfLiteTensor* inputTensor);
-
/* Noise reduction inference handler. */
bool NoiseReductionHandler(ApplicationContext& ctx, bool runAll)
{
@@ -57,7 +44,7 @@ namespace app {
size_t memDumpMaxLen = 0;
uint8_t* memDumpBaseAddr = nullptr;
size_t undefMemDumpBytesWritten = 0;
- size_t *pMemDumpBytesWritten = &undefMemDumpBytesWritten;
+ size_t* pMemDumpBytesWritten = &undefMemDumpBytesWritten;
if (ctx.Has("MEM_DUMP_LEN") && ctx.Has("MEM_DUMP_BASE_ADDR") && ctx.Has("MEM_DUMP_BYTE_WRITTEN")) {
memDumpMaxLen = ctx.Get<size_t>("MEM_DUMP_LEN");
memDumpBaseAddr = ctx.Get<uint8_t*>("MEM_DUMP_BASE_ADDR");
@@ -74,8 +61,8 @@ namespace app {
}
/* Populate Pre-Processing related parameters. */
- auto audioParamsWinLen = ctx.Get<uint32_t>("frameLength");
- auto audioParamsWinStride = ctx.Get<uint32_t>("frameStride");
+ auto audioFrameLen = ctx.Get<uint32_t>("frameLength");
+ auto audioFrameStride = ctx.Get<uint32_t>("frameStride");
auto nrNumInputFeatures = ctx.Get<uint32_t>("numInputFeatures");
TfLiteTensor* inputTensor = model.GetInputTensor(0);
@@ -103,7 +90,7 @@ namespace app {
if (ctx.Has("featureFileNames")) {
audioFileAccessorFunc = ctx.Get<std::function<const char*(const uint32_t)>>("featureFileNames");
}
- do{
+ do {
hal_lcd_clear(COLOR_BLACK);
auto startDumpAddress = memDumpBaseAddr + memDumpBytesWritten;
@@ -112,32 +99,38 @@ namespace app {
/* Creating a sliding window through the audio. */
auto audioDataSlider = audio::SlidingWindow<const int16_t>(
audioAccessorFunc(currentIndex),
- audioSizeAccessorFunc(currentIndex), audioParamsWinLen,
- audioParamsWinStride);
+ audioSizeAccessorFunc(currentIndex), audioFrameLen,
+ audioFrameStride);
info("Running inference on input feature map %" PRIu32 " => %s\n", currentIndex,
audioFileAccessorFunc(currentIndex));
memDumpBytesWritten += DumpDenoisedAudioHeader(audioFileAccessorFunc(currentIndex),
- (audioDataSlider.TotalStrides() + 1) * audioParamsWinLen,
+ (audioDataSlider.TotalStrides() + 1) * audioFrameLen,
memDumpBaseAddr + memDumpBytesWritten,
memDumpMaxLen - memDumpBytesWritten);
- rnn::RNNoiseProcess featureProcessor = rnn::RNNoiseProcess();
- rnn::vec1D32F audioFrame(audioParamsWinLen);
- rnn::vec1D32F inputFeatures(nrNumInputFeatures);
- rnn::vec1D32F denoisedAudioFrameFloat(audioParamsWinLen);
- std::vector<int16_t> denoisedAudioFrame(audioParamsWinLen);
+ /* Set up pre and post-processing. */
+ std::shared_ptr<rnn::RNNoiseFeatureProcessor> featureProcessor =
+ std::make_shared<rnn::RNNoiseFeatureProcessor>();
+ std::shared_ptr<rnn::FrameFeatures> frameFeatures =
+ std::make_shared<rnn::FrameFeatures>();
+
+ RNNoisePreProcess preProcess = RNNoisePreProcess(inputTensor, featureProcessor, frameFeatures);
+
+ std::vector<int16_t> denoisedAudioFrame(audioFrameLen);
+ RNNoisePostProcess postProcess = RNNoisePostProcess(outputTensor, denoisedAudioFrame,
+ featureProcessor, frameFeatures);
- std::vector<float> modelOutputFloat(outputTensor->bytes);
- rnn::FrameFeatures frameFeatures;
bool resetGRU = true;
while (audioDataSlider.HasNext()) {
const int16_t* inferenceWindow = audioDataSlider.Next();
- audioFrame = rnn::vec1D32F(inferenceWindow, inferenceWindow+audioParamsWinLen);
- featureProcessor.PreprocessFrame(audioFrame.data(), audioParamsWinLen, frameFeatures);
+ if (!preProcess.DoPreProcess(inferenceWindow, audioFrameLen)) {
+ printf_err("Pre-processing failed.");
+ return false;
+ }
/* Reset or copy over GRU states first to avoid TFLu memory overlap issues. */
if (resetGRU){
@@ -148,53 +141,35 @@ namespace app {
model.CopyGruStates();
}
- QuantizeAndPopulateInput(frameFeatures.m_featuresVec,
- inputTensor->params.scale, inputTensor->params.zero_point,
- inputTensor);
-
/* Strings for presentation/logging. */
std::string str_inf{"Running inference... "};
/* Display message on the LCD - inference running. */
- hal_lcd_display_text(
- str_inf.c_str(), str_inf.size(),
- dataPsnTxtInfStartX, dataPsnTxtInfStartY, false);
+ hal_lcd_display_text(str_inf.c_str(), str_inf.size(),
+ dataPsnTxtInfStartX, dataPsnTxtInfStartY, false);
info("Inference %zu/%zu\n", audioDataSlider.Index() + 1, audioDataSlider.TotalStrides() + 1);
/* Run inference over this feature sliding window. */
- profiler.StartProfiling("Inference");
- bool success = model.RunInference();
- profiler.StopProfiling();
- resetGRU = false;
-
- if (!success) {
+ if (!RunInference(model, profiler)) {
+ printf_err("Inference failed.");
return false;
}
+ resetGRU = false;
- /* De-quantize main model output ready for post-processing. */
- const auto* outputData = tflite::GetTensorData<int8_t>(outputTensor);
- auto outputQuantParams = arm::app::GetTensorQuantParams(outputTensor);
-
- for (size_t i = 0; i < outputTensor->bytes; ++i) {
- modelOutputFloat[i] = (static_cast<float>(outputData[i]) - outputQuantParams.offset)
- * outputQuantParams.scale;
- }
-
- /* Round and cast the post-processed results for dumping to wav. */
- featureProcessor.PostProcessFrame(modelOutputFloat, frameFeatures, denoisedAudioFrameFloat);
- for (size_t i = 0; i < audioParamsWinLen; ++i) {
- denoisedAudioFrame[i] = static_cast<int16_t>(std::roundf(denoisedAudioFrameFloat[i]));
+ /* Carry out post-processing. */
+ if (!postProcess.DoPostProcess()) {
+ printf_err("Post-processing failed.");
+ return false;
}
/* Erase. */
str_inf = std::string(str_inf.size(), ' ');
- hal_lcd_display_text(
- str_inf.c_str(), str_inf.size(),
- dataPsnTxtInfStartX, dataPsnTxtInfStartY, false);
+ hal_lcd_display_text(str_inf.c_str(), str_inf.size(),
+ dataPsnTxtInfStartX, dataPsnTxtInfStartY, false);
if (memDumpMaxLen > 0) {
- /* Dump output tensors to memory. */
+ /* Dump final post processed output to memory. */
memDumpBytesWritten += DumpOutputDenoisedAudioFrame(
denoisedAudioFrame,
memDumpBaseAddr + memDumpBytesWritten,
@@ -209,6 +184,7 @@ namespace app {
valMemDumpBytesWritten, startDumpAddress);
}
+ /* Finish by dumping the footer. */
DumpDenoisedAudioFooter(memDumpBaseAddr + memDumpBytesWritten, memDumpMaxLen - memDumpBytesWritten);
info("All inferences for audio clip complete.\n");
@@ -216,15 +192,13 @@ namespace app {
IncrementAppCtxClipIdx(ctx);
std::string clearString{' '};
- hal_lcd_display_text(
- clearString.c_str(), clearString.size(),
+ hal_lcd_display_text(clearString.c_str(), clearString.size(),
dataPsnTxtInfStartX, dataPsnTxtInfStartY, false);
std::string completeMsg{"Inference complete!"};
/* Display message on the LCD - inference complete. */
- hal_lcd_display_text(
- completeMsg.c_str(), completeMsg.size(),
+ hal_lcd_display_text(completeMsg.c_str(), completeMsg.size(),
dataPsnTxtInfStartX, dataPsnTxtInfStartY, false);
} while (runAll && ctx.Get<uint32_t>("clipIndex") != startClipIdx);
@@ -233,7 +207,7 @@ namespace app {
}
size_t DumpDenoisedAudioHeader(const char* filename, size_t dumpSize,
- uint8_t *memAddress, size_t memSize){
+ uint8_t* memAddress, size_t memSize){
if (memAddress == nullptr){
return 0;
@@ -284,7 +258,7 @@ namespace app {
return numBytesWritten;
}
- size_t DumpDenoisedAudioFooter(uint8_t *memAddress, size_t memSize){
+ size_t DumpDenoisedAudioFooter(uint8_t* memAddress, size_t memSize){
if ((memAddress == nullptr) || (memSize < 4)) {
return 0;
}
@@ -294,8 +268,8 @@ namespace app {
return sizeof(int32_t);
}
- size_t DumpOutputDenoisedAudioFrame(const std::vector<int16_t> &audioFrame,
- uint8_t *memAddress, size_t memSize)
+ size_t DumpOutputDenoisedAudioFrame(const std::vector<int16_t>& audioFrame,
+ uint8_t* memAddress, size_t memSize)
{
if (memAddress == nullptr) {
return 0;
@@ -324,7 +298,7 @@ namespace app {
const TfLiteTensor* tensor = model.GetOutputTensor(i);
const auto* tData = tflite::GetTensorData<uint8_t>(tensor);
#if VERIFY_TEST_OUTPUT
- arm::app::DumpTensor(tensor);
+ DumpTensor(tensor);
#endif /* VERIFY_TEST_OUTPUT */
/* Ensure that we don't overflow the allowed limit. */
if (numBytesWritten + tensor->bytes <= memSize) {
@@ -360,20 +334,5 @@ namespace app {
ctx.Set<uint32_t>("clipIndex", curClipIdx);
}
- void QuantizeAndPopulateInput(rnn::vec1D32F& inputFeatures,
- const float quantScale, const int quantOffset, TfLiteTensor* inputTensor)
- {
- const float minVal = std::numeric_limits<int8_t>::min();
- const float maxVal = std::numeric_limits<int8_t>::max();
-
- auto* inputTensorData = tflite::GetTensorData<int8_t>(inputTensor);
-
- for (size_t i=0; i < inputFeatures.size(); ++i) {
- float quantValue = ((inputFeatures[i] / quantScale) + quantOffset);
- inputTensorData[i] = static_cast<int8_t>(std::min<float>(std::max<float>(quantValue, minVal), maxVal));
- }
- }
-
-
} /* namespace app */
} /* namespace arm */