1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
//
// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "AudioCapture.hpp"
#include <alsa/asoundlib.h>
#include <sndfile.h>
#include <samplerate.h>
namespace asr
{
std::vector<float> AudioCapture::LoadAudioFile(std::string filePath)
{
SF_INFO inputSoundFileInfo;
SNDFILE* infile = NULL;
infile = sf_open(filePath.c_str(), SFM_READ, &inputSoundFileInfo);
float audioIn[inputSoundFileInfo.channels * inputSoundFileInfo.frames];
sf_read_float(infile, audioIn, inputSoundFileInfo.channels * inputSoundFileInfo.frames);
float sampleRate = 16000.0f;
float srcRatio = sampleRate / (float)inputSoundFileInfo.samplerate;
int outputFrames = ceil(inputSoundFileInfo.frames * srcRatio);
float dataOut[outputFrames];
// Convert to mono
float monoData[inputSoundFileInfo.frames];
for(int i = 0; i < inputSoundFileInfo.frames; i++)
{
float val = 0.0f;
for(int j = 0; j < inputSoundFileInfo.channels; j++)
monoData[i] += audioIn[i * inputSoundFileInfo.channels + j];
monoData[i] /= inputSoundFileInfo.channels;
}
// Resample
SRC_DATA srcData;
srcData.data_in = monoData;
srcData.input_frames = inputSoundFileInfo.frames;
srcData.data_out = dataOut;
srcData.output_frames = outputFrames;
srcData.src_ratio = srcRatio;
src_simple(&srcData, SRC_SINC_BEST_QUALITY, 1);
// Convert to Vector
std::vector<float> processedInput;
for(int i = 0; i < srcData.output_frames_gen; ++i)
{
processedInput.push_back(srcData.data_out[i]);
}
sf_close(infile);
return processedInput;
}
void AudioCapture::InitSlidingWindow(float* data, size_t dataSize, int minSamples, size_t stride)
{
this->m_window = SlidingWindow<const float>(data, dataSize, minSamples, stride);
}
bool AudioCapture::HasNext()
{
return m_window.HasNext();
}
std::vector<float> AudioCapture::Next()
{
if (this->m_window.HasNext())
{
int remainingData = this->m_window.RemainingData();
const float* windowData = this->m_window.Next();
size_t windowSize = this->m_window.GetWindowSize();
if(remainingData < windowSize)
{
std::vector<float> mfccAudioData(windowSize, 0.0f);
for(int i = 0; i < remainingData; ++i)
{
mfccAudioData[i] = *windowData;
if(i < remainingData - 1)
{
++windowData;
}
}
return mfccAudioData;
}
else
{
std::vector<float> mfccAudioData(windowData, windowData + windowSize);
return mfccAudioData;
}
}
else
{
throw std::out_of_range("Error, end of audio data reached.");
}
}
} //namespace asr
|