diff options
author | Richard Burton <richard.burton@arm.com> | 2022-05-04 09:45:02 +0100 |
---|---|---|
committer | Richard Burton <richard.burton@arm.com> | 2022-05-04 09:45:02 +0100 |
commit | 4e002791bc6781b549c6951cfe44f918289d7e82 (patch) | |
tree | b639243b5fa433657c207783a384bad1ed248536 /source/use_case/noise_reduction/src | |
parent | dd6d07b24bbf9023ebe8e8927be8aac3291d0f58 (diff) | |
download | ml-embedded-evaluation-kit-4e002791bc6781b549c6951cfe44f918289d7e82.tar.gz |
MLECO-3173: Add AD, KWS_ASR and Noise reduction use case API's
Signed-off-by: Richard Burton <richard.burton@arm.com>
Change-Id: I36f61ce74bf17f7b327cdae9704a22ca54144f37
Diffstat (limited to 'source/use_case/noise_reduction/src')
-rw-r--r-- | source/use_case/noise_reduction/src/MainLoop.cc | 4 | ||||
-rw-r--r-- | source/use_case/noise_reduction/src/RNNoiseFeatureProcessor.cc (renamed from source/use_case/noise_reduction/src/RNNoiseProcess.cc) | 60 | ||||
-rw-r--r-- | source/use_case/noise_reduction/src/RNNoiseProcessing.cc | 100 | ||||
-rw-r--r-- | source/use_case/noise_reduction/src/UseCaseHandler.cc | 129 |
4 files changed, 175 insertions, 118 deletions
diff --git a/source/use_case/noise_reduction/src/MainLoop.cc b/source/use_case/noise_reduction/src/MainLoop.cc index 5fd7823..fd72127 100644 --- a/source/use_case/noise_reduction/src/MainLoop.cc +++ b/source/use_case/noise_reduction/src/MainLoop.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. All rights reserved. + * Copyright (c) 2021-2022 Arm Limited. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,12 +14,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "hal.h" /* Brings in platform definitions. */ #include "UseCaseHandler.hpp" /* Handlers for different user options. */ #include "UseCaseCommonUtils.hpp" /* Utils functions. */ #include "RNNoiseModel.hpp" /* Model class for running inference. */ #include "InputFiles.hpp" /* For input audio clips. */ -#include "RNNoiseProcess.hpp" /* Pre-processing class */ #include "log_macros.h" enum opcodes diff --git a/source/use_case/noise_reduction/src/RNNoiseProcess.cc b/source/use_case/noise_reduction/src/RNNoiseFeatureProcessor.cc index 4c568fa..036894c 100644 --- a/source/use_case/noise_reduction/src/RNNoiseProcess.cc +++ b/source/use_case/noise_reduction/src/RNNoiseFeatureProcessor.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. All rights reserved. + * Copyright (c) 2021-2022 Arm Limited. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "RNNoiseProcess.hpp" +#include "RNNoiseFeatureProcessor.hpp" #include "log_macros.h" #include <algorithm> @@ -33,7 +33,7 @@ do { \ } \ } while(0) -RNNoiseProcess::RNNoiseProcess() : +RNNoiseFeatureProcessor::RNNoiseFeatureProcessor() : m_halfWindow(FRAME_SIZE, 0), m_dctTable(NB_BANDS * NB_BANDS), m_analysisMem(FRAME_SIZE, 0), @@ -54,9 +54,9 @@ RNNoiseProcess::RNNoiseProcess() : this->InitTables(); } -void RNNoiseProcess::PreprocessFrame(const float* audioData, - const size_t audioLen, - FrameFeatures& features) +void RNNoiseFeatureProcessor::PreprocessFrame(const float* audioData, + const size_t audioLen, + FrameFeatures& features) { /* Note audioWindow is modified in place */ const arrHp aHp {-1.99599, 0.99600 }; @@ -68,7 +68,7 @@ void RNNoiseProcess::PreprocessFrame(const float* audioData, this->ComputeFrameFeatures(audioWindow, features); } -void RNNoiseProcess::PostProcessFrame(vec1D32F& modelOutput, FrameFeatures& features, vec1D32F& outFrame) +void RNNoiseFeatureProcessor::PostProcessFrame(vec1D32F& modelOutput, FrameFeatures& features, vec1D32F& outFrame) { std::vector<float> outputBands = modelOutput; std::vector<float> gain(FREQ_SIZE, 0); @@ -92,7 +92,7 @@ void RNNoiseProcess::PostProcessFrame(vec1D32F& modelOutput, FrameFeatures& feat FrameSynthesis(outFrame, features.m_fftX); } -void RNNoiseProcess::InitTables() +void RNNoiseFeatureProcessor::InitTables() { constexpr float pi = M_PI; constexpr float halfPi = M_PI / 2; @@ -111,7 +111,7 @@ void RNNoiseProcess::InitTables() } } -void RNNoiseProcess::BiQuad( +void RNNoiseFeatureProcessor::BiQuad( const arrHp& bHp, const arrHp& aHp, arrHp& memHpX, @@ -126,8 +126,8 @@ void RNNoiseProcess::BiQuad( } } -void RNNoiseProcess::ComputeFrameFeatures(vec1D32F& audioWindow, - FrameFeatures& features) +void RNNoiseFeatureProcessor::ComputeFrameFeatures(vec1D32F& audioWindow, + FrameFeatures& features) { this->FrameAnalysis(audioWindow, features.m_fftX, @@ -264,7 +264,7 @@ void RNNoiseProcess::ComputeFrameFeatures(vec1D32F& audioWindow, features.m_featuresVec[NB_BANDS + 3 * NB_DELTA_CEPS + 1] = specVariability / CEPS_MEM - 2.1; } -void RNNoiseProcess::FrameAnalysis( +void RNNoiseFeatureProcessor::FrameAnalysis( const vec1D32F& audioWindow, vec1D32F& fft, vec1D32F& energy, @@ -289,7 +289,7 @@ void RNNoiseProcess::FrameAnalysis( ComputeBandEnergy(fft, energy); } -void RNNoiseProcess::ApplyWindow(vec1D32F& x) +void RNNoiseFeatureProcessor::ApplyWindow(vec1D32F& x) { if (WINDOW_SIZE != x.size()) { printf_err("Invalid size for vector to be windowed\n"); @@ -305,7 +305,7 @@ void RNNoiseProcess::ApplyWindow(vec1D32F& x) } } -void RNNoiseProcess::ForwardTransform( +void RNNoiseFeatureProcessor::ForwardTransform( vec1D32F& x, vec1D32F& fft) { @@ -327,7 +327,7 @@ void RNNoiseProcess::ForwardTransform( * first half of the FFT's. The conjugates are not present. */ } -void RNNoiseProcess::ComputeBandEnergy(const vec1D32F& fftX, vec1D32F& bandE) +void RNNoiseFeatureProcessor::ComputeBandEnergy(const vec1D32F& fftX, vec1D32F& bandE) { bandE = vec1D32F(NB_BANDS, 0); @@ -351,7 +351,7 @@ void RNNoiseProcess::ComputeBandEnergy(const vec1D32F& fftX, vec1D32F& bandE) bandE[NB_BANDS - 1] *= 2; } -void RNNoiseProcess::ComputeBandCorr(const vec1D32F& X, const vec1D32F& P, vec1D32F& bandC) +void RNNoiseFeatureProcessor::ComputeBandCorr(const vec1D32F& X, const vec1D32F& P, vec1D32F& bandC) { bandC = vec1D32F(NB_BANDS, 0); VERIFY(this->m_eband5ms.size() >= NB_BANDS); @@ -374,7 +374,7 @@ void RNNoiseProcess::ComputeBandCorr(const vec1D32F& X, const vec1D32F& P, vec1D bandC[NB_BANDS - 1] *= 2; } -void RNNoiseProcess::DCT(vec1D32F& input, vec1D32F& output) +void RNNoiseFeatureProcessor::DCT(vec1D32F& input, vec1D32F& output) { VERIFY(this->m_dctTable.size() >= NB_BANDS * NB_BANDS); for (uint32_t i = 0; i < NB_BANDS; ++i) { @@ -387,7 +387,7 @@ void RNNoiseProcess::DCT(vec1D32F& input, vec1D32F& output) } } -void RNNoiseProcess::PitchDownsample(vec1D32F& pitchBuf, size_t pitchBufSz) { +void RNNoiseFeatureProcessor::PitchDownsample(vec1D32F& pitchBuf, size_t pitchBufSz) { for (size_t i = 1; i < (pitchBufSz >> 1); ++i) { pitchBuf[i] = 0.5 * ( 0.5 * (this->m_pitchBuf[2 * i - 1] + this->m_pitchBuf[2 * i + 1]) @@ -431,7 +431,7 @@ void RNNoiseProcess::PitchDownsample(vec1D32F& pitchBuf, size_t pitchBufSz) { this->Fir5(lpc2, pitchBufSz >> 1, pitchBuf); } -int RNNoiseProcess::PitchSearch(vec1D32F& xLp, vec1D32F& y, uint32_t len, uint32_t maxPitch) { +int RNNoiseFeatureProcessor::PitchSearch(vec1D32F& xLp, vec1D32F& y, uint32_t len, uint32_t maxPitch) { uint32_t lag = len + maxPitch; vec1D32F xLp4(len >> 2, 0); vec1D32F yLp4(lag >> 2, 0); @@ -488,7 +488,7 @@ int RNNoiseProcess::PitchSearch(vec1D32F& xLp, vec1D32F& y, uint32_t len, uint32 return 2*bestPitch[0] - offset; } -arrHp RNNoiseProcess::FindBestPitch(vec1D32F& xCorr, vec1D32F& y, uint32_t len, uint32_t maxPitch) +arrHp RNNoiseFeatureProcessor::FindBestPitch(vec1D32F& xCorr, vec1D32F& y, uint32_t len, uint32_t maxPitch) { float Syy = 1; arrHp bestNum {-1, -1}; @@ -527,7 +527,7 @@ arrHp RNNoiseProcess::FindBestPitch(vec1D32F& xCorr, vec1D32F& y, uint32_t len, return bestPitch; } -int RNNoiseProcess::RemoveDoubling( +int RNNoiseFeatureProcessor::RemoveDoubling( vec1D32F& pitchBuf, uint32_t maxPeriod, uint32_t minPeriod, @@ -679,12 +679,12 @@ int RNNoiseProcess::RemoveDoubling( return this->m_lastPeriod; } -float RNNoiseProcess::ComputePitchGain(float xy, float xx, float yy) +float RNNoiseFeatureProcessor::ComputePitchGain(float xy, float xx, float yy) { return xy / math::MathUtils::SqrtF32(1+xx*yy); } -void RNNoiseProcess::AutoCorr( +void RNNoiseFeatureProcessor::AutoCorr( const vec1D32F& x, vec1D32F& ac, size_t lag, @@ -711,7 +711,7 @@ void RNNoiseProcess::AutoCorr( } -void RNNoiseProcess::PitchXCorr( +void RNNoiseFeatureProcessor::PitchXCorr( const vec1D32F& x, const vec1D32F& y, vec1D32F& xCorr, @@ -728,7 +728,7 @@ void RNNoiseProcess::PitchXCorr( } /* Linear predictor coefficients */ -void RNNoiseProcess::LPC( +void RNNoiseFeatureProcessor::LPC( const vec1D32F& correlation, int32_t p, vec1D32F& lpc) @@ -766,7 +766,7 @@ void RNNoiseProcess::LPC( } } -void RNNoiseProcess::Fir5( +void RNNoiseFeatureProcessor::Fir5( const vec1D32F &num, uint32_t N, vec1D32F &x) @@ -794,7 +794,7 @@ void RNNoiseProcess::Fir5( } } -void RNNoiseProcess::PitchFilter(FrameFeatures &features, vec1D32F &gain) { +void RNNoiseFeatureProcessor::PitchFilter(FrameFeatures &features, vec1D32F &gain) { std::vector<float> r(NB_BANDS, 0); std::vector<float> rf(FREQ_SIZE, 0); std::vector<float> newE(NB_BANDS); @@ -835,7 +835,7 @@ void RNNoiseProcess::PitchFilter(FrameFeatures &features, vec1D32F &gain) { } } -void RNNoiseProcess::FrameSynthesis(vec1D32F& outFrame, vec1D32F& fftY) { +void RNNoiseFeatureProcessor::FrameSynthesis(vec1D32F& outFrame, vec1D32F& fftY) { std::vector<float> x(WINDOW_SIZE, 0); InverseTransform(x, fftY); ApplyWindow(x); @@ -845,7 +845,7 @@ void RNNoiseProcess::FrameSynthesis(vec1D32F& outFrame, vec1D32F& fftY) { memcpy((m_synthesisMem.data()), &x[FRAME_SIZE], FRAME_SIZE*sizeof(float)); } -void RNNoiseProcess::InterpBandGain(vec1D32F& g, vec1D32F& bandE) { +void RNNoiseFeatureProcessor::InterpBandGain(vec1D32F& g, vec1D32F& bandE) { for (size_t i = 0; i < NB_BANDS - 1; i++) { int bandSize = (m_eband5ms[i + 1] - m_eband5ms[i]) << FRAME_SIZE_SHIFT; for (int j = 0; j < bandSize; j++) { @@ -855,7 +855,7 @@ void RNNoiseProcess::InterpBandGain(vec1D32F& g, vec1D32F& bandE) { } } -void RNNoiseProcess::InverseTransform(vec1D32F& out, vec1D32F& fftXIn) { +void RNNoiseFeatureProcessor::InverseTransform(vec1D32F& out, vec1D32F& fftXIn) { std::vector<float> x(WINDOW_SIZE * 2); /* This is complex. */ vec1D32F newFFT; /* This is complex. */ diff --git a/source/use_case/noise_reduction/src/RNNoiseProcessing.cc b/source/use_case/noise_reduction/src/RNNoiseProcessing.cc new file mode 100644 index 0000000..f6a3ec4 --- /dev/null +++ b/source/use_case/noise_reduction/src/RNNoiseProcessing.cc @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2022 Arm Limited. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "RNNoiseProcessing.hpp" +#include "log_macros.h" + +namespace arm { +namespace app { + + RNNoisePreProcess::RNNoisePreProcess(TfLiteTensor* inputTensor, + std::shared_ptr<rnn::RNNoiseFeatureProcessor> featureProcessor, std::shared_ptr<rnn::FrameFeatures> frameFeatures) + : m_inputTensor{inputTensor}, + m_featureProcessor{featureProcessor}, + m_frameFeatures{frameFeatures} + {} + + bool RNNoisePreProcess::DoPreProcess(const void* data, size_t inputSize) + { + if (data == nullptr) { + printf_err("Data pointer is null"); + return false; + } + + auto input = static_cast<const int16_t*>(data); + this->m_audioFrame = rnn::vec1D32F(input, input + inputSize); + m_featureProcessor->PreprocessFrame(this->m_audioFrame.data(), inputSize, *this->m_frameFeatures); + + QuantizeAndPopulateInput(this->m_frameFeatures->m_featuresVec, + this->m_inputTensor->params.scale, this->m_inputTensor->params.zero_point, + this->m_inputTensor); + + debug("Input tensor populated \n"); + + return true; + } + + void RNNoisePreProcess::QuantizeAndPopulateInput(rnn::vec1D32F& inputFeatures, + const float quantScale, const int quantOffset, + TfLiteTensor* inputTensor) + { + const float minVal = std::numeric_limits<int8_t>::min(); + const float maxVal = std::numeric_limits<int8_t>::max(); + + auto* inputTensorData = tflite::GetTensorData<int8_t>(inputTensor); + + for (size_t i=0; i < inputFeatures.size(); ++i) { + float quantValue = ((inputFeatures[i] / quantScale) + quantOffset); + inputTensorData[i] = static_cast<int8_t>(std::min<float>(std::max<float>(quantValue, minVal), maxVal)); + } + } + + RNNoisePostProcess::RNNoisePostProcess(TfLiteTensor* outputTensor, + std::vector<int16_t>& denoisedAudioFrame, + std::shared_ptr<rnn::RNNoiseFeatureProcessor> featureProcessor, + std::shared_ptr<rnn::FrameFeatures> frameFeatures) + : m_outputTensor{outputTensor}, + m_denoisedAudioFrame{denoisedAudioFrame}, + m_featureProcessor{featureProcessor}, + m_frameFeatures{frameFeatures} + { + this->m_denoisedAudioFrameFloat.reserve(denoisedAudioFrame.size()); + this->m_modelOutputFloat.resize(outputTensor->bytes); + } + + bool RNNoisePostProcess::DoPostProcess() + { + const auto* outputData = tflite::GetTensorData<int8_t>(this->m_outputTensor); + auto outputQuantParams = GetTensorQuantParams(this->m_outputTensor); + + for (size_t i = 0; i < this->m_outputTensor->bytes; ++i) { + this->m_modelOutputFloat[i] = (static_cast<float>(outputData[i]) - outputQuantParams.offset) + * outputQuantParams.scale; + } + + this->m_featureProcessor->PostProcessFrame(this->m_modelOutputFloat, + *this->m_frameFeatures, this->m_denoisedAudioFrameFloat); + + for (size_t i = 0; i < this->m_denoisedAudioFrame.size(); ++i) { + this->m_denoisedAudioFrame[i] = static_cast<int16_t>( + std::roundf(this->m_denoisedAudioFrameFloat[i])); + } + + return true; + } + +} /* namespace app */ +} /* namespace arm */
\ No newline at end of file diff --git a/source/use_case/noise_reduction/src/UseCaseHandler.cc b/source/use_case/noise_reduction/src/UseCaseHandler.cc index acb8ba7..53bb43e 100644 --- a/source/use_case/noise_reduction/src/UseCaseHandler.cc +++ b/source/use_case/noise_reduction/src/UseCaseHandler.cc @@ -21,12 +21,10 @@ #include "ImageUtils.hpp" #include "InputFiles.hpp" #include "RNNoiseModel.hpp" -#include "RNNoiseProcess.hpp" +#include "RNNoiseFeatureProcessor.hpp" +#include "RNNoiseProcessing.hpp" #include "log_macros.h" -#include <cmath> -#include <algorithm> - namespace arm { namespace app { @@ -36,17 +34,6 @@ namespace app { **/ static void IncrementAppCtxClipIdx(ApplicationContext& ctx); - /** - * @brief Quantize the given features and populate the input Tensor. - * @param[in] inputFeatures Vector of floating point features to quantize. - * @param[in] quantScale Quantization scale for the inputTensor. - * @param[in] quantOffset Quantization offset for the inputTensor. - * @param[in,out] inputTensor TFLite micro tensor to populate. - **/ - static void QuantizeAndPopulateInput(rnn::vec1D32F& inputFeatures, - float quantScale, int quantOffset, - TfLiteTensor* inputTensor); - /* Noise reduction inference handler. */ bool NoiseReductionHandler(ApplicationContext& ctx, bool runAll) { @@ -57,7 +44,7 @@ namespace app { size_t memDumpMaxLen = 0; uint8_t* memDumpBaseAddr = nullptr; size_t undefMemDumpBytesWritten = 0; - size_t *pMemDumpBytesWritten = &undefMemDumpBytesWritten; + size_t* pMemDumpBytesWritten = &undefMemDumpBytesWritten; if (ctx.Has("MEM_DUMP_LEN") && ctx.Has("MEM_DUMP_BASE_ADDR") && ctx.Has("MEM_DUMP_BYTE_WRITTEN")) { memDumpMaxLen = ctx.Get<size_t>("MEM_DUMP_LEN"); memDumpBaseAddr = ctx.Get<uint8_t*>("MEM_DUMP_BASE_ADDR"); @@ -74,8 +61,8 @@ namespace app { } /* Populate Pre-Processing related parameters. */ - auto audioParamsWinLen = ctx.Get<uint32_t>("frameLength"); - auto audioParamsWinStride = ctx.Get<uint32_t>("frameStride"); + auto audioFrameLen = ctx.Get<uint32_t>("frameLength"); + auto audioFrameStride = ctx.Get<uint32_t>("frameStride"); auto nrNumInputFeatures = ctx.Get<uint32_t>("numInputFeatures"); TfLiteTensor* inputTensor = model.GetInputTensor(0); @@ -103,7 +90,7 @@ namespace app { if (ctx.Has("featureFileNames")) { audioFileAccessorFunc = ctx.Get<std::function<const char*(const uint32_t)>>("featureFileNames"); } - do{ + do { hal_lcd_clear(COLOR_BLACK); auto startDumpAddress = memDumpBaseAddr + memDumpBytesWritten; @@ -112,32 +99,38 @@ namespace app { /* Creating a sliding window through the audio. */ auto audioDataSlider = audio::SlidingWindow<const int16_t>( audioAccessorFunc(currentIndex), - audioSizeAccessorFunc(currentIndex), audioParamsWinLen, - audioParamsWinStride); + audioSizeAccessorFunc(currentIndex), audioFrameLen, + audioFrameStride); info("Running inference on input feature map %" PRIu32 " => %s\n", currentIndex, audioFileAccessorFunc(currentIndex)); memDumpBytesWritten += DumpDenoisedAudioHeader(audioFileAccessorFunc(currentIndex), - (audioDataSlider.TotalStrides() + 1) * audioParamsWinLen, + (audioDataSlider.TotalStrides() + 1) * audioFrameLen, memDumpBaseAddr + memDumpBytesWritten, memDumpMaxLen - memDumpBytesWritten); - rnn::RNNoiseProcess featureProcessor = rnn::RNNoiseProcess(); - rnn::vec1D32F audioFrame(audioParamsWinLen); - rnn::vec1D32F inputFeatures(nrNumInputFeatures); - rnn::vec1D32F denoisedAudioFrameFloat(audioParamsWinLen); - std::vector<int16_t> denoisedAudioFrame(audioParamsWinLen); + /* Set up pre and post-processing. */ + std::shared_ptr<rnn::RNNoiseFeatureProcessor> featureProcessor = + std::make_shared<rnn::RNNoiseFeatureProcessor>(); + std::shared_ptr<rnn::FrameFeatures> frameFeatures = + std::make_shared<rnn::FrameFeatures>(); + + RNNoisePreProcess preProcess = RNNoisePreProcess(inputTensor, featureProcessor, frameFeatures); + + std::vector<int16_t> denoisedAudioFrame(audioFrameLen); + RNNoisePostProcess postProcess = RNNoisePostProcess(outputTensor, denoisedAudioFrame, + featureProcessor, frameFeatures); - std::vector<float> modelOutputFloat(outputTensor->bytes); - rnn::FrameFeatures frameFeatures; bool resetGRU = true; while (audioDataSlider.HasNext()) { const int16_t* inferenceWindow = audioDataSlider.Next(); - audioFrame = rnn::vec1D32F(inferenceWindow, inferenceWindow+audioParamsWinLen); - featureProcessor.PreprocessFrame(audioFrame.data(), audioParamsWinLen, frameFeatures); + if (!preProcess.DoPreProcess(inferenceWindow, audioFrameLen)) { + printf_err("Pre-processing failed."); + return false; + } /* Reset or copy over GRU states first to avoid TFLu memory overlap issues. */ if (resetGRU){ @@ -148,53 +141,35 @@ namespace app { model.CopyGruStates(); } - QuantizeAndPopulateInput(frameFeatures.m_featuresVec, - inputTensor->params.scale, inputTensor->params.zero_point, - inputTensor); - /* Strings for presentation/logging. */ std::string str_inf{"Running inference... "}; /* Display message on the LCD - inference running. */ - hal_lcd_display_text( - str_inf.c_str(), str_inf.size(), - dataPsnTxtInfStartX, dataPsnTxtInfStartY, false); + hal_lcd_display_text(str_inf.c_str(), str_inf.size(), + dataPsnTxtInfStartX, dataPsnTxtInfStartY, false); info("Inference %zu/%zu\n", audioDataSlider.Index() + 1, audioDataSlider.TotalStrides() + 1); /* Run inference over this feature sliding window. */ - profiler.StartProfiling("Inference"); - bool success = model.RunInference(); - profiler.StopProfiling(); - resetGRU = false; - - if (!success) { + if (!RunInference(model, profiler)) { + printf_err("Inference failed."); return false; } + resetGRU = false; - /* De-quantize main model output ready for post-processing. */ - const auto* outputData = tflite::GetTensorData<int8_t>(outputTensor); - auto outputQuantParams = arm::app::GetTensorQuantParams(outputTensor); - - for (size_t i = 0; i < outputTensor->bytes; ++i) { - modelOutputFloat[i] = (static_cast<float>(outputData[i]) - outputQuantParams.offset) - * outputQuantParams.scale; - } - - /* Round and cast the post-processed results for dumping to wav. */ - featureProcessor.PostProcessFrame(modelOutputFloat, frameFeatures, denoisedAudioFrameFloat); - for (size_t i = 0; i < audioParamsWinLen; ++i) { - denoisedAudioFrame[i] = static_cast<int16_t>(std::roundf(denoisedAudioFrameFloat[i])); + /* Carry out post-processing. */ + if (!postProcess.DoPostProcess()) { + printf_err("Post-processing failed."); + return false; } /* Erase. */ str_inf = std::string(str_inf.size(), ' '); - hal_lcd_display_text( - str_inf.c_str(), str_inf.size(), - dataPsnTxtInfStartX, dataPsnTxtInfStartY, false); + hal_lcd_display_text(str_inf.c_str(), str_inf.size(), + dataPsnTxtInfStartX, dataPsnTxtInfStartY, false); if (memDumpMaxLen > 0) { - /* Dump output tensors to memory. */ + /* Dump final post processed output to memory. */ memDumpBytesWritten += DumpOutputDenoisedAudioFrame( denoisedAudioFrame, memDumpBaseAddr + memDumpBytesWritten, @@ -209,6 +184,7 @@ namespace app { valMemDumpBytesWritten, startDumpAddress); } + /* Finish by dumping the footer. */ DumpDenoisedAudioFooter(memDumpBaseAddr + memDumpBytesWritten, memDumpMaxLen - memDumpBytesWritten); info("All inferences for audio clip complete.\n"); @@ -216,15 +192,13 @@ namespace app { IncrementAppCtxClipIdx(ctx); std::string clearString{' '}; - hal_lcd_display_text( - clearString.c_str(), clearString.size(), + hal_lcd_display_text(clearString.c_str(), clearString.size(), dataPsnTxtInfStartX, dataPsnTxtInfStartY, false); std::string completeMsg{"Inference complete!"}; /* Display message on the LCD - inference complete. */ - hal_lcd_display_text( - completeMsg.c_str(), completeMsg.size(), + hal_lcd_display_text(completeMsg.c_str(), completeMsg.size(), dataPsnTxtInfStartX, dataPsnTxtInfStartY, false); } while (runAll && ctx.Get<uint32_t>("clipIndex") != startClipIdx); @@ -233,7 +207,7 @@ namespace app { } size_t DumpDenoisedAudioHeader(const char* filename, size_t dumpSize, - uint8_t *memAddress, size_t memSize){ + uint8_t* memAddress, size_t memSize){ if (memAddress == nullptr){ return 0; @@ -284,7 +258,7 @@ namespace app { return numBytesWritten; } - size_t DumpDenoisedAudioFooter(uint8_t *memAddress, size_t memSize){ + size_t DumpDenoisedAudioFooter(uint8_t* memAddress, size_t memSize){ if ((memAddress == nullptr) || (memSize < 4)) { return 0; } @@ -294,8 +268,8 @@ namespace app { return sizeof(int32_t); } - size_t DumpOutputDenoisedAudioFrame(const std::vector<int16_t> &audioFrame, - uint8_t *memAddress, size_t memSize) + size_t DumpOutputDenoisedAudioFrame(const std::vector<int16_t>& audioFrame, + uint8_t* memAddress, size_t memSize) { if (memAddress == nullptr) { return 0; @@ -324,7 +298,7 @@ namespace app { const TfLiteTensor* tensor = model.GetOutputTensor(i); const auto* tData = tflite::GetTensorData<uint8_t>(tensor); #if VERIFY_TEST_OUTPUT - arm::app::DumpTensor(tensor); + DumpTensor(tensor); #endif /* VERIFY_TEST_OUTPUT */ /* Ensure that we don't overflow the allowed limit. */ if (numBytesWritten + tensor->bytes <= memSize) { @@ -360,20 +334,5 @@ namespace app { ctx.Set<uint32_t>("clipIndex", curClipIdx); } - void QuantizeAndPopulateInput(rnn::vec1D32F& inputFeatures, - const float quantScale, const int quantOffset, TfLiteTensor* inputTensor) - { - const float minVal = std::numeric_limits<int8_t>::min(); - const float maxVal = std::numeric_limits<int8_t>::max(); - - auto* inputTensorData = tflite::GetTensorData<int8_t>(inputTensor); - - for (size_t i=0; i < inputFeatures.size(); ++i) { - float quantValue = ((inputFeatures[i] / quantScale) + quantOffset); - inputTensorData[i] = static_cast<int8_t>(std::min<float>(std::max<float>(quantValue, minVal), maxVal)); - } - } - - } /* namespace app */ } /* namespace arm */ |