aboutsummaryrefslogtreecommitdiff
path: root/samples/common/src/Audio/AudioCapture.cpp
blob: 920d7a5233a47522b4c891f09ac606571ead9455 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
//
// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//

#include "AudioCapture.hpp"
#include <alsa/asoundlib.h>
#include <sndfile.h>
#include <samplerate.h>

namespace audio
{
    std::vector<float> AudioCapture::LoadAudioFile(std::string filePath)
    {
        SF_INFO inputSoundFileInfo;
        SNDFILE* infile = nullptr;
        infile = sf_open(filePath.c_str(), SFM_READ, &inputSoundFileInfo);

        float audioIn[inputSoundFileInfo.channels * inputSoundFileInfo.frames];
        sf_read_float(infile, audioIn, inputSoundFileInfo.channels * inputSoundFileInfo.frames);

        float sampleRate = 16000.0f;
        float srcRatio = sampleRate / (float)inputSoundFileInfo.samplerate;
        int outputFrames = ceilf(inputSoundFileInfo.frames * srcRatio);

        // Convert to mono
        std::vector<float> monoData(inputSoundFileInfo.frames);
        for(int i = 0; i < inputSoundFileInfo.frames; i++)
        {
            for(int j = 0; j < inputSoundFileInfo.channels; j++)
                monoData[i] += audioIn[i * inputSoundFileInfo.channels + j];
            monoData[i] /= inputSoundFileInfo.channels;
        }

        // Resample
        SRC_DATA srcData;
        srcData.data_in = monoData.data();
        srcData.input_frames = inputSoundFileInfo.frames;

        std::vector<float> dataOut(outputFrames);
        srcData.data_out = dataOut.data();

        srcData.output_frames = outputFrames;
        srcData.src_ratio = srcRatio;

        src_simple(&srcData, SRC_SINC_BEST_QUALITY, 1);

        sf_close(infile);

        return dataOut;
    }

    void AudioCapture::InitSlidingWindow(float* data, size_t dataSize, int minSamples, size_t stride)
    {
        this->m_window = SlidingWindow<const float>(data, dataSize, minSamples, stride);
    }

    bool AudioCapture::HasNext()
    {
        return m_window.HasNext();
    }

    std::vector<float> AudioCapture::Next()
    {
        if (this->m_window.HasNext())
        {
            int remainingData = this->m_window.RemainingData();
            const float* windowData = this->m_window.Next();

            size_t windowSize = this->m_window.GetWindowSize();

            if(remainingData < windowSize)
            {
                std::vector<float> audioData(windowSize, 0.0f);
                for(int i = 0; i < remainingData; ++i)
                {
                    audioData[i] = *windowData;
                    if(i < remainingData - 1)
                    {
                        ++windowData;
                    }
                }
                return audioData;
            }
            else
            {
                std::vector<float> audioData(windowData, windowData + windowSize);
                return audioData;
            }
        }
        else
        {
            throw std::out_of_range("Error, end of audio data reached.");
        }
    }
} //namespace asr