diff options
author | Éanna Ó Catháin <eanna.ocathain@arm.com> | 2021-04-07 14:35:25 +0100 |
---|---|---|
committer | Jim Flynn <jim.flynn@arm.com> | 2021-05-07 09:11:52 +0000 |
commit | c6ab02a626e15b4a12fc09ecd844eb8b95380c3c (patch) | |
tree | 9912ed9cdb89cdb24483b22d6621ae30049ae321 /samples/SpeechRecognition/src/AudioCapture.cpp | |
parent | e813d67f86df41a238ff79b5c554ef5027f56576 (diff) | |
download | armnn-c6ab02a626e15b4a12fc09ecd844eb8b95380c3c.tar.gz |
MLECO-1252 ASR sample application using the public ArmNN C++ API.
Change-Id: I98cd505b8772a8c8fa88308121bc94135bb45068
Signed-off-by: Éanna Ó Catháin <eanna.ocathain@arm.com>
Diffstat (limited to 'samples/SpeechRecognition/src/AudioCapture.cpp')
-rw-r--r-- | samples/SpeechRecognition/src/AudioCapture.cpp | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/samples/SpeechRecognition/src/AudioCapture.cpp b/samples/SpeechRecognition/src/AudioCapture.cpp new file mode 100644 index 0000000000..f3b9092218 --- /dev/null +++ b/samples/SpeechRecognition/src/AudioCapture.cpp @@ -0,0 +1,104 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "AudioCapture.hpp" +#include <alsa/asoundlib.h> +#include <sndfile.h> +#include <samplerate.h> + +namespace asr +{ + std::vector<float> AudioCapture::LoadAudioFile(std::string filePath) + { + SF_INFO inputSoundFileInfo; + SNDFILE* infile = NULL; + infile = sf_open(filePath.c_str(), SFM_READ, &inputSoundFileInfo); + + float audioIn[inputSoundFileInfo.channels * inputSoundFileInfo.frames]; + sf_read_float(infile, audioIn, inputSoundFileInfo.channels * inputSoundFileInfo.frames); + + float sampleRate = 16000.0f; + float srcRatio = sampleRate / (float)inputSoundFileInfo.samplerate; + int outputFrames = ceil(inputSoundFileInfo.frames * srcRatio); + float dataOut[outputFrames]; + + // Convert to mono + float monoData[inputSoundFileInfo.frames]; + for(int i = 0; i < inputSoundFileInfo.frames; i++) + { + float val = 0.0f; + for(int j = 0; j < inputSoundFileInfo.channels; j++) + monoData[i] += audioIn[i * inputSoundFileInfo.channels + j]; + monoData[i] /= inputSoundFileInfo.channels; + } + + // Resample + SRC_DATA srcData; + srcData.data_in = monoData; + srcData.input_frames = inputSoundFileInfo.frames; + srcData.data_out = dataOut; + srcData.output_frames = outputFrames; + srcData.src_ratio = srcRatio; + + src_simple(&srcData, SRC_SINC_BEST_QUALITY, 1); + + // Convert to Vector + std::vector<float> processedInput; + + for(int i = 0; i < srcData.output_frames_gen; ++i) + { + processedInput.push_back(srcData.data_out[i]); + } + + sf_close(infile); + + return processedInput; + } + + void AudioCapture::InitSlidingWindow(float* data, size_t dataSize, int minSamples, size_t stride) + { + this->m_window = SlidingWindow<const float>(data, dataSize, minSamples, stride); + } + + bool AudioCapture::HasNext() + { + return m_window.HasNext(); + } + + std::vector<float> AudioCapture::Next() + { + if (this->m_window.HasNext()) + { + int remainingData = this->m_window.RemainingData(); + const float* windowData = this->m_window.Next(); + + size_t windowSize = this->m_window.GetWindowSize(); + + if(remainingData < windowSize) + { + std::vector<float> mfccAudioData(windowSize, 0.0f); + for(int i = 0; i < remainingData; ++i) + { + mfccAudioData[i] = *windowData; + if(i < remainingData - 1) + { + ++windowData; + } + } + return mfccAudioData; + } + else + { + std::vector<float> mfccAudioData(windowData, windowData + windowSize); + return mfccAudioData; + } + } + else + { + throw std::out_of_range("Error, end of audio data reached."); + } + } +} //namespace asr + |