aboutsummaryrefslogtreecommitdiff
path: root/samples/SpeechRecognition/src/AudioCapture.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'samples/SpeechRecognition/src/AudioCapture.cpp')
-rw-r--r--samples/SpeechRecognition/src/AudioCapture.cpp104
1 files changed, 0 insertions, 104 deletions
diff --git a/samples/SpeechRecognition/src/AudioCapture.cpp b/samples/SpeechRecognition/src/AudioCapture.cpp
deleted file mode 100644
index f3b9092218..0000000000
--- a/samples/SpeechRecognition/src/AudioCapture.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-//
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#include "AudioCapture.hpp"
-#include <alsa/asoundlib.h>
-#include <sndfile.h>
-#include <samplerate.h>
-
-namespace asr
-{
- std::vector<float> AudioCapture::LoadAudioFile(std::string filePath)
- {
- SF_INFO inputSoundFileInfo;
- SNDFILE* infile = NULL;
- infile = sf_open(filePath.c_str(), SFM_READ, &inputSoundFileInfo);
-
- float audioIn[inputSoundFileInfo.channels * inputSoundFileInfo.frames];
- sf_read_float(infile, audioIn, inputSoundFileInfo.channels * inputSoundFileInfo.frames);
-
- float sampleRate = 16000.0f;
- float srcRatio = sampleRate / (float)inputSoundFileInfo.samplerate;
- int outputFrames = ceil(inputSoundFileInfo.frames * srcRatio);
- float dataOut[outputFrames];
-
- // Convert to mono
- float monoData[inputSoundFileInfo.frames];
- for(int i = 0; i < inputSoundFileInfo.frames; i++)
- {
- float val = 0.0f;
- for(int j = 0; j < inputSoundFileInfo.channels; j++)
- monoData[i] += audioIn[i * inputSoundFileInfo.channels + j];
- monoData[i] /= inputSoundFileInfo.channels;
- }
-
- // Resample
- SRC_DATA srcData;
- srcData.data_in = monoData;
- srcData.input_frames = inputSoundFileInfo.frames;
- srcData.data_out = dataOut;
- srcData.output_frames = outputFrames;
- srcData.src_ratio = srcRatio;
-
- src_simple(&srcData, SRC_SINC_BEST_QUALITY, 1);
-
- // Convert to Vector
- std::vector<float> processedInput;
-
- for(int i = 0; i < srcData.output_frames_gen; ++i)
- {
- processedInput.push_back(srcData.data_out[i]);
- }
-
- sf_close(infile);
-
- return processedInput;
- }
-
- void AudioCapture::InitSlidingWindow(float* data, size_t dataSize, int minSamples, size_t stride)
- {
- this->m_window = SlidingWindow<const float>(data, dataSize, minSamples, stride);
- }
-
- bool AudioCapture::HasNext()
- {
- return m_window.HasNext();
- }
-
- std::vector<float> AudioCapture::Next()
- {
- if (this->m_window.HasNext())
- {
- int remainingData = this->m_window.RemainingData();
- const float* windowData = this->m_window.Next();
-
- size_t windowSize = this->m_window.GetWindowSize();
-
- if(remainingData < windowSize)
- {
- std::vector<float> mfccAudioData(windowSize, 0.0f);
- for(int i = 0; i < remainingData; ++i)
- {
- mfccAudioData[i] = *windowData;
- if(i < remainingData - 1)
- {
- ++windowData;
- }
- }
- return mfccAudioData;
- }
- else
- {
- std::vector<float> mfccAudioData(windowData, windowData + windowSize);
- return mfccAudioData;
- }
- }
- else
- {
- throw std::out_of_range("Error, end of audio data reached.");
- }
- }
-} //namespace asr
-