summaryrefslogtreecommitdiff
path: root/source/use_case/ad/src/AdMelSpectrogram.cc
blob: 183c05cc96808e14542647f9cd0736dae1128922 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
/*
 * Copyright (c) 2021 Arm Limited. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#include "AdMelSpectrogram.hpp"

#include "PlatformMath.hpp"

namespace arm {
namespace app {
namespace audio {

    bool AdMelSpectrogram::ApplyMelFilterBank(
            std::vector<float>&                 fftVec,
            std::vector<std::vector<float>>&    melFilterBank,
            std::vector<int32_t>&               filterBankFilterFirst,
            std::vector<int32_t>&               filterBankFilterLast,
            std::vector<float>&                 melEnergies)
    {
        const size_t numBanks = melEnergies.size();

        if (numBanks != filterBankFilterFirst.size() ||
            numBanks != filterBankFilterLast.size()) {
            printf_err("unexpected filter bank lengths\n");
            return false;
        }

        for (size_t bin = 0; bin < numBanks; ++bin) {
            auto filterBankIter = melFilterBank[bin].begin();
            float melEnergy = 1e-10; /* Avoid log of zero at later stages. */
            const int32_t firstIndex = filterBankFilterFirst[bin];
            const int32_t lastIndex = filterBankFilterLast[bin];

            for (int32_t i = firstIndex; i <= lastIndex; ++i) {
                melEnergy += (*filterBankIter++ * fftVec[i]);
            }

            melEnergies[bin] = melEnergy;
        }

        return true;
    }

    void AdMelSpectrogram::ConvertToLogarithmicScale(
            std::vector<float>& melEnergies)
    {
        /* Container for natural logarithms of mel energies */
        std::vector <float> vecLogEnergies(melEnergies.size(), 0.f);

        /* Because we are taking natural logs, we need to multiply by log10(e).
         * Also, for wav2letter model, we scale our log10 values by 10 */
        constexpr float multiplier = 10.0 * /* default scalar */
                                     0.4342944819032518; /* log10f(std::exp(1.0))*/

        /* Take log of the whole vector */
        math::MathUtils::VecLogarithmF32(melEnergies, vecLogEnergies);

        /* Scale the log values. */
        for (auto iterM = melEnergies.begin(), iterL = vecLogEnergies.begin();
             iterM != melEnergies.end(); ++iterM, ++iterL) {

            *iterM = *iterL * multiplier;
        }
    }

    float AdMelSpectrogram::GetMelFilterBankNormaliser(
            const float&    leftMel,
            const float&    rightMel,
            const bool      useHTKMethod)
    {
        /* Slaney normalization for mel weights. */
        return (2.0f / (AdMelSpectrogram::InverseMelScale(rightMel, useHTKMethod) -
                        AdMelSpectrogram::InverseMelScale(leftMel, useHTKMethod)));
    }

} /* namespace audio */
} /* namespace app */
} /* namespace arm */