diff options
Diffstat (limited to 'samples/SpeechRecognition/src/AudioCapture.cpp')
-rw-r--r-- | samples/SpeechRecognition/src/AudioCapture.cpp | 104 |
1 files changed, 0 insertions, 104 deletions
diff --git a/samples/SpeechRecognition/src/AudioCapture.cpp b/samples/SpeechRecognition/src/AudioCapture.cpp deleted file mode 100644 index f3b9092218..0000000000 --- a/samples/SpeechRecognition/src/AudioCapture.cpp +++ /dev/null @@ -1,104 +0,0 @@ -// -// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "AudioCapture.hpp" -#include <alsa/asoundlib.h> -#include <sndfile.h> -#include <samplerate.h> - -namespace asr -{ - std::vector<float> AudioCapture::LoadAudioFile(std::string filePath) - { - SF_INFO inputSoundFileInfo; - SNDFILE* infile = NULL; - infile = sf_open(filePath.c_str(), SFM_READ, &inputSoundFileInfo); - - float audioIn[inputSoundFileInfo.channels * inputSoundFileInfo.frames]; - sf_read_float(infile, audioIn, inputSoundFileInfo.channels * inputSoundFileInfo.frames); - - float sampleRate = 16000.0f; - float srcRatio = sampleRate / (float)inputSoundFileInfo.samplerate; - int outputFrames = ceil(inputSoundFileInfo.frames * srcRatio); - float dataOut[outputFrames]; - - // Convert to mono - float monoData[inputSoundFileInfo.frames]; - for(int i = 0; i < inputSoundFileInfo.frames; i++) - { - float val = 0.0f; - for(int j = 0; j < inputSoundFileInfo.channels; j++) - monoData[i] += audioIn[i * inputSoundFileInfo.channels + j]; - monoData[i] /= inputSoundFileInfo.channels; - } - - // Resample - SRC_DATA srcData; - srcData.data_in = monoData; - srcData.input_frames = inputSoundFileInfo.frames; - srcData.data_out = dataOut; - srcData.output_frames = outputFrames; - srcData.src_ratio = srcRatio; - - src_simple(&srcData, SRC_SINC_BEST_QUALITY, 1); - - // Convert to Vector - std::vector<float> processedInput; - - for(int i = 0; i < srcData.output_frames_gen; ++i) - { - processedInput.push_back(srcData.data_out[i]); - } - - sf_close(infile); - - return processedInput; - } - - void AudioCapture::InitSlidingWindow(float* data, size_t dataSize, int minSamples, size_t stride) - { - this->m_window = SlidingWindow<const float>(data, dataSize, minSamples, stride); - } - - bool AudioCapture::HasNext() - { - return m_window.HasNext(); - } - - std::vector<float> AudioCapture::Next() - { - if (this->m_window.HasNext()) - { - int remainingData = this->m_window.RemainingData(); - const float* windowData = this->m_window.Next(); - - size_t windowSize = this->m_window.GetWindowSize(); - - if(remainingData < windowSize) - { - std::vector<float> mfccAudioData(windowSize, 0.0f); - for(int i = 0; i < remainingData; ++i) - { - mfccAudioData[i] = *windowData; - if(i < remainingData - 1) - { - ++windowData; - } - } - return mfccAudioData; - } - else - { - std::vector<float> mfccAudioData(windowData, windowData + windowSize); - return mfccAudioData; - } - } - else - { - throw std::out_of_range("Error, end of audio data reached."); - } - } -} //namespace asr - |