diff options
Diffstat (limited to 'source/application/main/Mfcc.cc')
-rw-r--r-- | source/application/main/Mfcc.cc | 61 |
1 files changed, 32 insertions, 29 deletions
diff --git a/source/application/main/Mfcc.cc b/source/application/main/Mfcc.cc index bf16159..9ddcb5d 100644 --- a/source/application/main/Mfcc.cc +++ b/source/application/main/Mfcc.cc @@ -44,7 +44,7 @@ namespace audio { m_useHtkMethod(useHtkMethod) {} - std::string MfccParams::Str() + std::string MfccParams::Str() const { char strC[1024]; snprintf(strC, sizeof(strC) - 1, "\n \ @@ -74,7 +74,7 @@ namespace audio { this->_m_params.m_numFbankBins, 0.0); this->_m_windowFunc = std::vector<float>(this->_m_params.m_frameLen); - const float multiplier = 2 * M_PI / this->_m_params.m_frameLen; + const auto multiplier = static_cast<float>(2 * M_PI / this->_m_params.m_frameLen); /* Create window function. */ for (size_t i = 0; i < this->_m_params.m_frameLen; i++) { @@ -88,7 +88,7 @@ namespace audio { void MFCC::Init() { - this->_InitMelFilterBank(); + this->InitMelFilterBank(); } float MFCC::MelScale(const float freq, const bool useHTKMethod) @@ -126,8 +126,8 @@ namespace audio { bool MFCC::ApplyMelFilterBank( std::vector<float>& fftVec, std::vector<std::vector<float>>& melFilterBank, - std::vector<int32_t>& filterBankFilterFirst, - std::vector<int32_t>& filterBankFilterLast, + std::vector<uint32_t>& filterBankFilterFirst, + std::vector<uint32_t>& filterBankFilterLast, std::vector<float>& melEnergies) { const size_t numBanks = melEnergies.size(); @@ -140,11 +140,12 @@ namespace audio { for (size_t bin = 0; bin < numBanks; ++bin) { auto filterBankIter = melFilterBank[bin].begin(); + auto end = melFilterBank[bin].end(); float melEnergy = FLT_MIN; /* Avoid log of zero at later stages */ - int32_t firstIndex = filterBankFilterFirst[bin]; - int32_t lastIndex = filterBankFilterLast[bin]; + const uint32_t firstIndex = filterBankFilterFirst[bin]; + const uint32_t lastIndex = std::min<uint32_t>(filterBankFilterLast[bin], fftVec.size() - 1); - for (int i = firstIndex; i <= lastIndex; i++) { + for (uint32_t i = firstIndex; i <= lastIndex && filterBankIter != end; i++) { float energyRep = math::MathUtils::SqrtF32(fftVec[i]); melEnergy += (*filterBankIter++ * energyRep); } @@ -157,14 +158,14 @@ namespace audio { void MFCC::ConvertToLogarithmicScale(std::vector<float>& melEnergies) { - for (size_t bin = 0; bin < melEnergies.size(); ++bin) { - melEnergies[bin] = logf(melEnergies[bin]); + for (float& melEnergy : melEnergies) { + melEnergy = logf(melEnergy); } } - void MFCC::_ConvertToPowerSpectrum() + void MFCC::ConvertToPowerSpectrum() { - const uint32_t halfDim = this->_m_params.m_frameLenPadded / 2; + const uint32_t halfDim = this->_m_buffer.size() / 2; /* Handle this special case. */ float firstEnergy = this->_m_buffer[0] * this->_m_buffer[0]; @@ -193,7 +194,7 @@ namespace audio { for (int32_t k = 0, m = 0; k < coefficientCount; k++, m += inputLength) { for (int32_t n = 0; n < inputLength; n++) { dctMatix[m+n] = normalizer * - math::MathUtils::CosineF32((n + 0.5) * angle); + math::MathUtils::CosineF32((n + 0.5f) * angle); } angle += angleIncr; } @@ -214,10 +215,10 @@ namespace audio { return 1.f; } - void MFCC::_InitMelFilterBank() + void MFCC::InitMelFilterBank() { - if (!this->_IsMelFilterBankInited()) { - this->_m_melFilterBank = this->_CreateMelFilterBank(); + if (!this->IsMelFilterBankInited()) { + this->_m_melFilterBank = this->CreateMelFilterBank(); this->_m_dctMatrix = this->CreateDCTMatrix( this->_m_params.m_numFbankBins, this->_m_params.m_numMfccFeatures); @@ -225,17 +226,17 @@ namespace audio { } } - bool MFCC::_IsMelFilterBankInited() + bool MFCC::IsMelFilterBankInited() const { return this->_m_filterBankInitialised; } - void MFCC::_MfccComputePreFeature(const std::vector<int16_t>& audioData) + void MFCC::MfccComputePreFeature(const std::vector<int16_t>& audioData) { - this->_InitMelFilterBank(); + this->InitMelFilterBank(); /* TensorFlow way of normalizing .wav data to (-1, 1). */ - constexpr float normaliser = 1.0/(1<<15); + constexpr float normaliser = 1.0/(1u<<15u); for (size_t i = 0; i < this->_m_params.m_frameLen; i++) { this->_m_frame[i] = static_cast<float>(audioData[i]) * normaliser; } @@ -252,7 +253,7 @@ namespace audio { math::MathUtils::FftF32(this->_m_frame, this->_m_buffer, this->_m_fftInstance); /* Convert to power spectrum. */ - this->_ConvertToPowerSpectrum(); + this->ConvertToPowerSpectrum(); /* Apply mel filterbanks. */ if (!this->ApplyMelFilterBank(this->_m_buffer, @@ -269,7 +270,7 @@ namespace audio { std::vector<float> MFCC::MfccCompute(const std::vector<int16_t>& audioData) { - this->_MfccComputePreFeature(audioData); + this->MfccComputePreFeature(audioData); std::vector<float> mfccOut(this->_m_params.m_numMfccFeatures); @@ -288,7 +289,7 @@ namespace audio { return mfccOut; } - std::vector<std::vector<float>> MFCC::_CreateMelFilterBank() + std::vector<std::vector<float>> MFCC::CreateMelFilterBank() { size_t numFftBins = this->_m_params.m_frameLenPadded / 2; float fftBinWidth = static_cast<float>(this->_m_params.m_samplingFreq) / this->_m_params.m_frameLenPadded; @@ -303,17 +304,18 @@ namespace audio { std::vector<std::vector<float>> melFilterBank( this->_m_params.m_numFbankBins); this->_m_filterBankFilterFirst = - std::vector<int32_t>(this->_m_params.m_numFbankBins); + std::vector<uint32_t>(this->_m_params.m_numFbankBins); this->_m_filterBankFilterLast = - std::vector<int32_t>(this->_m_params.m_numFbankBins); + std::vector<uint32_t>(this->_m_params.m_numFbankBins); for (size_t bin = 0; bin < this->_m_params.m_numFbankBins; bin++) { float leftMel = melLowFreq + bin * melFreqDelta; float centerMel = melLowFreq + (bin + 1) * melFreqDelta; float rightMel = melLowFreq + (bin + 2) * melFreqDelta; - int32_t firstIndex = -1; - int32_t lastIndex = -1; + uint32_t firstIndex = 0; + uint32_t lastIndex = 0; + bool firstIndexFound = false; const float normaliser = this->GetMelFilterBankNormaliser(leftMel, rightMel, this->_m_params.m_useHtkMethod); for (size_t i = 0; i < numFftBins; i++) { @@ -330,8 +332,9 @@ namespace audio { } thisBin[i] = weight * normaliser; - if (firstIndex == -1) { + if (!firstIndexFound) { firstIndex = i; + firstIndexFound = true; } lastIndex = i; } @@ -341,7 +344,7 @@ namespace audio { this->_m_filterBankFilterLast[bin] = lastIndex; /* Copy the part we care about. */ - for (int32_t i = firstIndex; i <= lastIndex; i++) { + for (uint32_t i = firstIndex; i <= lastIndex; i++) { melFilterBank[bin].push_back(thisBin[i]); } } |