summaryrefslogtreecommitdiff
path: root/source/application/main/Mfcc.cc
diff options
context:
space:
mode:
Diffstat (limited to 'source/application/main/Mfcc.cc')
-rw-r--r--source/application/main/Mfcc.cc61
1 files changed, 32 insertions, 29 deletions
diff --git a/source/application/main/Mfcc.cc b/source/application/main/Mfcc.cc
index bf16159..9ddcb5d 100644
--- a/source/application/main/Mfcc.cc
+++ b/source/application/main/Mfcc.cc
@@ -44,7 +44,7 @@ namespace audio {
m_useHtkMethod(useHtkMethod)
{}
- std::string MfccParams::Str()
+ std::string MfccParams::Str() const
{
char strC[1024];
snprintf(strC, sizeof(strC) - 1, "\n \
@@ -74,7 +74,7 @@ namespace audio {
this->_m_params.m_numFbankBins, 0.0);
this->_m_windowFunc = std::vector<float>(this->_m_params.m_frameLen);
- const float multiplier = 2 * M_PI / this->_m_params.m_frameLen;
+ const auto multiplier = static_cast<float>(2 * M_PI / this->_m_params.m_frameLen);
/* Create window function. */
for (size_t i = 0; i < this->_m_params.m_frameLen; i++) {
@@ -88,7 +88,7 @@ namespace audio {
void MFCC::Init()
{
- this->_InitMelFilterBank();
+ this->InitMelFilterBank();
}
float MFCC::MelScale(const float freq, const bool useHTKMethod)
@@ -126,8 +126,8 @@ namespace audio {
bool MFCC::ApplyMelFilterBank(
std::vector<float>& fftVec,
std::vector<std::vector<float>>& melFilterBank,
- std::vector<int32_t>& filterBankFilterFirst,
- std::vector<int32_t>& filterBankFilterLast,
+ std::vector<uint32_t>& filterBankFilterFirst,
+ std::vector<uint32_t>& filterBankFilterLast,
std::vector<float>& melEnergies)
{
const size_t numBanks = melEnergies.size();
@@ -140,11 +140,12 @@ namespace audio {
for (size_t bin = 0; bin < numBanks; ++bin) {
auto filterBankIter = melFilterBank[bin].begin();
+ auto end = melFilterBank[bin].end();
float melEnergy = FLT_MIN; /* Avoid log of zero at later stages */
- int32_t firstIndex = filterBankFilterFirst[bin];
- int32_t lastIndex = filterBankFilterLast[bin];
+ const uint32_t firstIndex = filterBankFilterFirst[bin];
+ const uint32_t lastIndex = std::min<uint32_t>(filterBankFilterLast[bin], fftVec.size() - 1);
- for (int i = firstIndex; i <= lastIndex; i++) {
+ for (uint32_t i = firstIndex; i <= lastIndex && filterBankIter != end; i++) {
float energyRep = math::MathUtils::SqrtF32(fftVec[i]);
melEnergy += (*filterBankIter++ * energyRep);
}
@@ -157,14 +158,14 @@ namespace audio {
void MFCC::ConvertToLogarithmicScale(std::vector<float>& melEnergies)
{
- for (size_t bin = 0; bin < melEnergies.size(); ++bin) {
- melEnergies[bin] = logf(melEnergies[bin]);
+ for (float& melEnergy : melEnergies) {
+ melEnergy = logf(melEnergy);
}
}
- void MFCC::_ConvertToPowerSpectrum()
+ void MFCC::ConvertToPowerSpectrum()
{
- const uint32_t halfDim = this->_m_params.m_frameLenPadded / 2;
+ const uint32_t halfDim = this->_m_buffer.size() / 2;
/* Handle this special case. */
float firstEnergy = this->_m_buffer[0] * this->_m_buffer[0];
@@ -193,7 +194,7 @@ namespace audio {
for (int32_t k = 0, m = 0; k < coefficientCount; k++, m += inputLength) {
for (int32_t n = 0; n < inputLength; n++) {
dctMatix[m+n] = normalizer *
- math::MathUtils::CosineF32((n + 0.5) * angle);
+ math::MathUtils::CosineF32((n + 0.5f) * angle);
}
angle += angleIncr;
}
@@ -214,10 +215,10 @@ namespace audio {
return 1.f;
}
- void MFCC::_InitMelFilterBank()
+ void MFCC::InitMelFilterBank()
{
- if (!this->_IsMelFilterBankInited()) {
- this->_m_melFilterBank = this->_CreateMelFilterBank();
+ if (!this->IsMelFilterBankInited()) {
+ this->_m_melFilterBank = this->CreateMelFilterBank();
this->_m_dctMatrix = this->CreateDCTMatrix(
this->_m_params.m_numFbankBins,
this->_m_params.m_numMfccFeatures);
@@ -225,17 +226,17 @@ namespace audio {
}
}
- bool MFCC::_IsMelFilterBankInited()
+ bool MFCC::IsMelFilterBankInited() const
{
return this->_m_filterBankInitialised;
}
- void MFCC::_MfccComputePreFeature(const std::vector<int16_t>& audioData)
+ void MFCC::MfccComputePreFeature(const std::vector<int16_t>& audioData)
{
- this->_InitMelFilterBank();
+ this->InitMelFilterBank();
/* TensorFlow way of normalizing .wav data to (-1, 1). */
- constexpr float normaliser = 1.0/(1<<15);
+ constexpr float normaliser = 1.0/(1u<<15u);
for (size_t i = 0; i < this->_m_params.m_frameLen; i++) {
this->_m_frame[i] = static_cast<float>(audioData[i]) * normaliser;
}
@@ -252,7 +253,7 @@ namespace audio {
math::MathUtils::FftF32(this->_m_frame, this->_m_buffer, this->_m_fftInstance);
/* Convert to power spectrum. */
- this->_ConvertToPowerSpectrum();
+ this->ConvertToPowerSpectrum();
/* Apply mel filterbanks. */
if (!this->ApplyMelFilterBank(this->_m_buffer,
@@ -269,7 +270,7 @@ namespace audio {
std::vector<float> MFCC::MfccCompute(const std::vector<int16_t>& audioData)
{
- this->_MfccComputePreFeature(audioData);
+ this->MfccComputePreFeature(audioData);
std::vector<float> mfccOut(this->_m_params.m_numMfccFeatures);
@@ -288,7 +289,7 @@ namespace audio {
return mfccOut;
}
- std::vector<std::vector<float>> MFCC::_CreateMelFilterBank()
+ std::vector<std::vector<float>> MFCC::CreateMelFilterBank()
{
size_t numFftBins = this->_m_params.m_frameLenPadded / 2;
float fftBinWidth = static_cast<float>(this->_m_params.m_samplingFreq) / this->_m_params.m_frameLenPadded;
@@ -303,17 +304,18 @@ namespace audio {
std::vector<std::vector<float>> melFilterBank(
this->_m_params.m_numFbankBins);
this->_m_filterBankFilterFirst =
- std::vector<int32_t>(this->_m_params.m_numFbankBins);
+ std::vector<uint32_t>(this->_m_params.m_numFbankBins);
this->_m_filterBankFilterLast =
- std::vector<int32_t>(this->_m_params.m_numFbankBins);
+ std::vector<uint32_t>(this->_m_params.m_numFbankBins);
for (size_t bin = 0; bin < this->_m_params.m_numFbankBins; bin++) {
float leftMel = melLowFreq + bin * melFreqDelta;
float centerMel = melLowFreq + (bin + 1) * melFreqDelta;
float rightMel = melLowFreq + (bin + 2) * melFreqDelta;
- int32_t firstIndex = -1;
- int32_t lastIndex = -1;
+ uint32_t firstIndex = 0;
+ uint32_t lastIndex = 0;
+ bool firstIndexFound = false;
const float normaliser = this->GetMelFilterBankNormaliser(leftMel, rightMel, this->_m_params.m_useHtkMethod);
for (size_t i = 0; i < numFftBins; i++) {
@@ -330,8 +332,9 @@ namespace audio {
}
thisBin[i] = weight * normaliser;
- if (firstIndex == -1) {
+ if (!firstIndexFound) {
firstIndex = i;
+ firstIndexFound = true;
}
lastIndex = i;
}
@@ -341,7 +344,7 @@ namespace audio {
this->_m_filterBankFilterLast[bin] = lastIndex;
/* Copy the part we care about. */
- for (int32_t i = firstIndex; i <= lastIndex; i++) {
+ for (uint32_t i = firstIndex; i <= lastIndex; i++) {
melFilterBank[bin].push_back(thisBin[i]);
}
}