summaryrefslogtreecommitdiff
path: root/source/use_case/kws_asr
diff options
context:
space:
mode:
Diffstat (limited to 'source/use_case/kws_asr')
-rw-r--r--source/use_case/kws_asr/include/AsrClassifier.hpp66
-rw-r--r--source/use_case/kws_asr/include/AsrResult.hpp63
-rw-r--r--source/use_case/kws_asr/include/KwsProcessing.hpp138
-rw-r--r--source/use_case/kws_asr/include/KwsResult.hpp63
-rw-r--r--source/use_case/kws_asr/include/MicroNetKwsMfcc.hpp51
-rw-r--r--source/use_case/kws_asr/include/MicroNetKwsModel.hpp66
-rw-r--r--source/use_case/kws_asr/include/OutputDecode.hpp40
-rw-r--r--source/use_case/kws_asr/include/Wav2LetterMfcc.hpp113
-rw-r--r--source/use_case/kws_asr/include/Wav2LetterModel.hpp71
-rw-r--r--source/use_case/kws_asr/include/Wav2LetterPostprocess.hpp108
-rw-r--r--source/use_case/kws_asr/include/Wav2LetterPreprocess.hpp182
-rw-r--r--source/use_case/kws_asr/src/AsrClassifier.cc136
-rw-r--r--source/use_case/kws_asr/src/KwsProcessing.cc212
-rw-r--r--source/use_case/kws_asr/src/MainLoop.cc46
-rw-r--r--source/use_case/kws_asr/src/MicroNetKwsModel.cc63
-rw-r--r--source/use_case/kws_asr/src/OutputDecode.cc47
-rw-r--r--source/use_case/kws_asr/src/UseCaseHandler.cc3
-rw-r--r--source/use_case/kws_asr/src/Wav2LetterMfcc.cc141
-rw-r--r--source/use_case/kws_asr/src/Wav2LetterModel.cc61
-rw-r--r--source/use_case/kws_asr/src/Wav2LetterPostprocess.cc214
-rw-r--r--source/use_case/kws_asr/src/Wav2LetterPreprocess.cc208
-rw-r--r--source/use_case/kws_asr/usecase.cmake4
22 files changed, 48 insertions, 2048 deletions
diff --git a/source/use_case/kws_asr/include/AsrClassifier.hpp b/source/use_case/kws_asr/include/AsrClassifier.hpp
deleted file mode 100644
index 6ab9685..0000000
--- a/source/use_case/kws_asr/include/AsrClassifier.hpp
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ASR_CLASSIFIER_HPP
-#define ASR_CLASSIFIER_HPP
-
-#include "Classifier.hpp"
-
-namespace arm {
-namespace app {
-
- class AsrClassifier : public Classifier {
- public:
- /**
- * @brief Gets the top N classification results from the
- * output vector.
- * @param[in] outputTensor Inference output tensor from an NN model.
- * @param[out] vecResults A vector of classification results
- * populated by this function.
- * @param[in] labels Labels vector to match classified classes
- * @param[in] topNCount Number of top classifications to pick.
- * @param[in] use_softmax Whether softmax scaling should be applied to model output.
- * @return true if successful, false otherwise.
- **/
- bool GetClassificationResults(
- TfLiteTensor* outputTensor,
- std::vector<ClassificationResult>& vecResults,
- const std::vector <std::string>& labels, uint32_t topNCount,
- bool use_softmax = false) override;
-
- private:
-
- /**
- * @brief Utility function that gets the top 1 classification results from the
- * output tensor (vector of vector).
- * @param[in] tensor Inference output tensor from an NN model.
- * @param[out] vecResults A vector of classification results
- * populated by this function.
- * @param[in] labels Labels vector to match classified classes.
- * @param[in] scale Quantization scale.
- * @param[in] zeroPoint Quantization zero point.
- * @return true if successful, false otherwise.
- **/
- template<typename T>
- bool GetTopResults(TfLiteTensor* tensor,
- std::vector<ClassificationResult>& vecResults,
- const std::vector <std::string>& labels, double scale, double zeroPoint);
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* ASR_CLASSIFIER_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/include/AsrResult.hpp b/source/use_case/kws_asr/include/AsrResult.hpp
deleted file mode 100644
index 25fa9e8..0000000
--- a/source/use_case/kws_asr/include/AsrResult.hpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ASR_RESULT_HPP
-#define ASR_RESULT_HPP
-
-#include "ClassificationResult.hpp"
-
-#include <vector>
-
-namespace arm {
-namespace app {
-namespace asr {
-
- using ResultVec = std::vector<arm::app::ClassificationResult>;
-
- /* Structure for holding asr result. */
- class AsrResult {
-
- public:
- ResultVec m_resultVec; /* Container for "thresholded" classification results. */
- float m_timeStamp; /* Audio timestamp for this result. */
- uint32_t m_inferenceNumber; /* Corresponding inference number. */
- float m_threshold; /* Threshold value for `m_resultVec` */
-
- AsrResult() = delete;
- AsrResult(ResultVec& resultVec,
- const float timestamp,
- const uint32_t inferenceIdx,
- const float scoreThreshold) {
-
- this->m_threshold = scoreThreshold;
- this->m_timeStamp = timestamp;
- this->m_inferenceNumber = inferenceIdx;
-
- this->m_resultVec = ResultVec();
- for (auto& i : resultVec) {
- if (i.m_normalisedVal >= this->m_threshold) {
- this->m_resultVec.emplace_back(i);
- }
- }
- }
- ~AsrResult() = default;
- };
-
-} /* namespace asr */
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* ASR_RESULT_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/include/KwsProcessing.hpp b/source/use_case/kws_asr/include/KwsProcessing.hpp
deleted file mode 100644
index d3de3b3..0000000
--- a/source/use_case/kws_asr/include/KwsProcessing.hpp
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_PROCESSING_HPP
-#define KWS_PROCESSING_HPP
-
-#include <AudioUtils.hpp>
-#include "BaseProcessing.hpp"
-#include "Model.hpp"
-#include "Classifier.hpp"
-#include "MicroNetKwsMfcc.hpp"
-
-#include <functional>
-
-namespace arm {
-namespace app {
-
- /**
- * @brief Pre-processing class for Keyword Spotting use case.
- * Implements methods declared by BasePreProcess and anything else needed
- * to populate input tensors ready for inference.
- */
- class KwsPreProcess : public BasePreProcess {
-
- public:
- /**
- * @brief Constructor
- * @param[in] inputTensor Pointer to the TFLite Micro input Tensor.
- * @param[in] numFeatures How many MFCC features to use.
- * @param[in] numFeatureFrames Number of MFCC vectors that need to be calculated
- * for an inference.
- * @param[in] mfccFrameLength Number of audio samples used to calculate one set of MFCC values when
- * sliding a window through the audio sample.
- * @param[in] mfccFrameStride Number of audio samples between consecutive windows.
- **/
- explicit KwsPreProcess(TfLiteTensor* inputTensor, size_t numFeatures, size_t numFeatureFrames,
- int mfccFrameLength, int mfccFrameStride);
-
- /**
- * @brief Should perform pre-processing of 'raw' input audio data and load it into
- * TFLite Micro input tensors ready for inference.
- * @param[in] input Pointer to the data that pre-processing will work on.
- * @param[in] inputSize Size of the input data.
- * @return true if successful, false otherwise.
- **/
- bool DoPreProcess(const void* input, size_t inputSize) override;
-
- size_t m_audioWindowIndex = 0; /* Index of audio slider, used when caching features in longer clips. */
- size_t m_audioDataWindowSize; /* Amount of audio needed for 1 inference. */
- size_t m_audioDataStride; /* Amount of audio to stride across if doing >1 inference in longer clips. */
-
- private:
- TfLiteTensor* m_inputTensor; /* Model input tensor. */
- const int m_mfccFrameLength;
- const int m_mfccFrameStride;
- const size_t m_numMfccFrames; /* How many sets of m_numMfccFeats. */
-
- audio::MicroNetKwsMFCC m_mfcc;
- audio::SlidingWindow<const int16_t> m_mfccSlidingWindow;
- size_t m_numMfccVectorsInAudioStride;
- size_t m_numReusedMfccVectors;
- std::function<void (std::vector<int16_t>&, int, bool, size_t)> m_mfccFeatureCalculator;
-
- /**
- * @brief Returns a function to perform feature calculation and populates input tensor data with
- * MFCC data.
- *
- * Input tensor data type check is performed to choose correct MFCC feature data type.
- * If tensor has an integer data type then original features are quantised.
- *
- * Warning: MFCC calculator provided as input must have the same life scope as returned function.
- *
- * @param[in] mfcc MFCC feature calculator.
- * @param[in,out] inputTensor Input tensor pointer to store calculated features.
- * @param[in] cacheSize Size of the feature vectors cache (number of feature vectors).
- * @return Function to be called providing audio sample and sliding window index.
- */
- std::function<void (std::vector<int16_t>&, int, bool, size_t)>
- GetFeatureCalculator(audio::MicroNetKwsMFCC& mfcc,
- TfLiteTensor* inputTensor,
- size_t cacheSize);
-
- template<class T>
- std::function<void (std::vector<int16_t>&, size_t, bool, size_t)>
- FeatureCalc(TfLiteTensor* inputTensor, size_t cacheSize,
- std::function<std::vector<T> (std::vector<int16_t>& )> compute);
- };
-
- /**
- * @brief Post-processing class for Keyword Spotting use case.
- * Implements methods declared by BasePostProcess and anything else needed
- * to populate result vector.
- */
- class KwsPostProcess : public BasePostProcess {
-
- private:
- TfLiteTensor* m_outputTensor; /* Model output tensor. */
- Classifier& m_kwsClassifier; /* KWS Classifier object. */
- const std::vector<std::string>& m_labels; /* KWS Labels. */
- std::vector<ClassificationResult>& m_results; /* Results vector for a single inference. */
-
- public:
- /**
- * @brief Constructor
- * @param[in] outputTensor Pointer to the TFLite Micro output Tensor.
- * @param[in] classifier Classifier object used to get top N results from classification.
- * @param[in] labels Vector of string labels to identify each output of the model.
- * @param[in/out] results Vector of classification results to store decoded outputs.
- **/
- KwsPostProcess(TfLiteTensor* outputTensor, Classifier& classifier,
- const std::vector<std::string>& labels,
- std::vector<ClassificationResult>& results);
-
- /**
- * @brief Should perform post-processing of the result of inference then
- * populate KWS result data for any later use.
- * @return true if successful, false otherwise.
- **/
- bool DoPostProcess() override;
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_PROCESSING_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/include/KwsResult.hpp b/source/use_case/kws_asr/include/KwsResult.hpp
deleted file mode 100644
index 45bb790..0000000
--- a/source/use_case/kws_asr/include/KwsResult.hpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_RESULT_HPP
-#define KWS_RESULT_HPP
-
-#include "ClassificationResult.hpp"
-
-#include <vector>
-
-namespace arm {
-namespace app {
-namespace kws {
-
- using ResultVec = std::vector < arm::app::ClassificationResult >;
-
- /* Structure for holding kws result. */
- class KwsResult {
-
- public:
- ResultVec m_resultVec; /* Container for "thresholded" classification results. */
- float m_timeStamp; /* Audio timestamp for this result. */
- uint32_t m_inferenceNumber; /* Corresponding inference number. */
- float m_threshold; /* Threshold value for `m_resultVec.` */
-
- KwsResult() = delete;
- KwsResult(ResultVec& resultVec,
- const float timestamp,
- const uint32_t inferenceIdx,
- const float scoreThreshold) {
-
- this->m_threshold = scoreThreshold;
- this->m_timeStamp = timestamp;
- this->m_inferenceNumber = inferenceIdx;
-
- this->m_resultVec = ResultVec();
- for (auto & i : resultVec) {
- if (i.m_normalisedVal >= this->m_threshold) {
- this->m_resultVec.emplace_back(i);
- }
- }
- }
- ~KwsResult() = default;
- };
-
-} /* namespace kws */
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_RESULT_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/include/MicroNetKwsMfcc.hpp b/source/use_case/kws_asr/include/MicroNetKwsMfcc.hpp
deleted file mode 100644
index af6ba5f..0000000
--- a/source/use_case/kws_asr/include/MicroNetKwsMfcc.hpp
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_ASR_MICRONET_MFCC_HPP
-#define KWS_ASR_MICRONET_MFCC_HPP
-
-#include "Mfcc.hpp"
-
-namespace arm {
-namespace app {
-namespace audio {
-
- /* Class to provide MicroNet specific MFCC calculation requirements. */
- class MicroNetKwsMFCC : public MFCC {
-
- public:
- static constexpr uint32_t ms_defaultSamplingFreq = 16000;
- static constexpr uint32_t ms_defaultNumFbankBins = 40;
- static constexpr uint32_t ms_defaultMelLoFreq = 20;
- static constexpr uint32_t ms_defaultMelHiFreq = 4000;
- static constexpr bool ms_defaultUseHtkMethod = true;
-
-
- explicit MicroNetKwsMFCC(const size_t numFeats, const size_t frameLen)
- : MFCC(MfccParams(
- ms_defaultSamplingFreq, ms_defaultNumFbankBins,
- ms_defaultMelLoFreq, ms_defaultMelHiFreq,
- numFeats, frameLen, ms_defaultUseHtkMethod))
- {}
- MicroNetKwsMFCC() = delete;
- ~MicroNetKwsMFCC() = default;
- };
-
-} /* namespace audio */
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_ASR_MICRONET_MFCC_HPP */
diff --git a/source/use_case/kws_asr/include/MicroNetKwsModel.hpp b/source/use_case/kws_asr/include/MicroNetKwsModel.hpp
deleted file mode 100644
index 22cf916..0000000
--- a/source/use_case/kws_asr/include/MicroNetKwsModel.hpp
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_ASR_MICRONETMODEL_HPP
-#define KWS_ASR_MICRONETMODEL_HPP
-
-#include "Model.hpp"
-
-namespace arm {
-namespace app {
-namespace kws {
- extern const int g_FrameLength;
- extern const int g_FrameStride;
- extern const float g_ScoreThreshold;
- extern const uint32_t g_NumMfcc;
- extern const uint32_t g_NumAudioWins;
-} /* namespace kws */
-} /* namespace app */
-} /* namespace arm */
-
-namespace arm {
-namespace app {
- class MicroNetKwsModel : public Model {
- public:
- /* Indices for the expected model - based on input and output tensor shapes */
- static constexpr uint32_t ms_inputRowsIdx = 1;
- static constexpr uint32_t ms_inputColsIdx = 2;
- static constexpr uint32_t ms_outputRowsIdx = 2;
- static constexpr uint32_t ms_outputColsIdx = 3;
-
- protected:
- /** @brief Gets the reference to op resolver interface class. */
- const tflite::MicroOpResolver& GetOpResolver() override;
-
- /** @brief Adds operations to the op resolver instance. */
- bool EnlistOperations() override;
-
- const uint8_t* ModelPointer() override;
-
- size_t ModelSize() override;
-
- private:
- /* Maximum number of individual operations that can be enlisted. */
- static constexpr int ms_maxOpCnt = 7;
-
- /* A mutable op resolver instance. */
- tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_ASR_MICRONETMODEL_HPP */
diff --git a/source/use_case/kws_asr/include/OutputDecode.hpp b/source/use_case/kws_asr/include/OutputDecode.hpp
deleted file mode 100644
index cea2c33..0000000
--- a/source/use_case/kws_asr/include/OutputDecode.hpp
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_ASR_OUTPUT_DECODE_HPP
-#define KWS_ASR_OUTPUT_DECODE_HPP
-
-#include "AsrClassifier.hpp"
-
-namespace arm {
-namespace app {
-namespace audio {
-namespace asr {
-
- /**
- * @brief Gets the top N classification results from the
- * output vector.
- * @param[in] vecResults Label output from classifier.
- * @return true if successful, false otherwise.
- **/
- std::string DecodeOutput(const std::vector<ClassificationResult>& vecResults);
-
-} /* namespace asr */
-} /* namespace audio */
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_ASR_OUTPUT_DECODE_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/include/Wav2LetterMfcc.hpp b/source/use_case/kws_asr/include/Wav2LetterMfcc.hpp
deleted file mode 100644
index 75d75da..0000000
--- a/source/use_case/kws_asr/include/Wav2LetterMfcc.hpp
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_ASR_WAV2LET_MFCC_HPP
-#define KWS_ASR_WAV2LET_MFCC_HPP
-
-#include "Mfcc.hpp"
-
-namespace arm {
-namespace app {
-namespace audio {
-
- /* Class to provide Wav2Letter specific MFCC calculation requirements. */
- class Wav2LetterMFCC : public MFCC {
-
- public:
- static constexpr uint32_t ms_defaultSamplingFreq = 16000;
- static constexpr uint32_t ms_defaultNumFbankBins = 128;
- static constexpr uint32_t ms_defaultMelLoFreq = 0;
- static constexpr uint32_t ms_defaultMelHiFreq = 8000;
- static constexpr bool ms_defaultUseHtkMethod = false;
-
- explicit Wav2LetterMFCC(const size_t numFeats, const size_t frameLen)
- : MFCC(MfccParams(
- ms_defaultSamplingFreq, ms_defaultNumFbankBins,
- ms_defaultMelLoFreq, ms_defaultMelHiFreq,
- numFeats, frameLen, ms_defaultUseHtkMethod))
- {}
-
- Wav2LetterMFCC() = delete;
- ~Wav2LetterMFCC() = default;
-
- protected:
-
- /**
- * @brief Overrides base class implementation of this function.
- * @param[in] fftVec Vector populated with FFT magnitudes.
- * @param[in] melFilterBank 2D Vector with filter bank weights.
- * @param[in] filterBankFilterFirst Vector containing the first indices of filter bank
- * to be used for each bin.
- * @param[in] filterBankFilterLast Vector containing the last indices of filter bank
- * to be used for each bin.
- * @param[out] melEnergies Pre-allocated vector of MEL energies to be
- * populated.
- * @return true if successful, false otherwise.
- */
- bool ApplyMelFilterBank(
- std::vector<float>& fftVec,
- std::vector<std::vector<float>>& melFilterBank,
- std::vector<uint32_t>& filterBankFilterFirst,
- std::vector<uint32_t>& filterBankFilterLast,
- std::vector<float>& melEnergies) override;
-
- /**
- * @brief Override for the base class implementation convert mel
- * energies to logarithmic scale. The difference from
- * default behaviour is that the power is converted to dB
- * and subsequently clamped.
- * @param[in,out] melEnergies 1D vector of Mel energies.
- **/
- void ConvertToLogarithmicScale(
- std::vector<float>& melEnergies) override;
-
- /**
- * @brief Create a matrix used to calculate Discrete Cosine
- * Transform. Override for the base class' default
- * implementation as the first and last elements
- * use a different normaliser.
- * @param[in] inputLength Input length of the buffer on which
- * DCT will be performed.
- * @param[in] coefficientCount Total coefficients per input length.
- * @return 1D vector with inputLength x coefficientCount elements
- * populated with DCT coefficients.
- */
- std::vector<float> CreateDCTMatrix(
- int32_t inputLength,
- int32_t coefficientCount) override;
-
- /**
- * @brief Given the low and high Mel values, get the normaliser
- * for weights to be applied when populating the filter
- * bank. Override for the base class implementation.
- * @param[in] leftMel Low Mel frequency value.
- * @param[in] rightMel High Mel frequency value.
- * @param[in] useHTKMethod Bool to signal if HTK method is to be
- * used for calculation.
- * @return Value to use for normalising.
- */
- float GetMelFilterBankNormaliser(
- const float& leftMel,
- const float& rightMel,
- bool useHTKMethod) override;
-
- };
-
-} /* namespace audio */
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_ASR_WAV2LET_MFCC_HPP */
diff --git a/source/use_case/kws_asr/include/Wav2LetterModel.hpp b/source/use_case/kws_asr/include/Wav2LetterModel.hpp
deleted file mode 100644
index 0e1adc5..0000000
--- a/source/use_case/kws_asr/include/Wav2LetterModel.hpp
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_ASR_WAV2LETTER_MODEL_HPP
-#define KWS_ASR_WAV2LETTER_MODEL_HPP
-
-#include "Model.hpp"
-
-namespace arm {
-namespace app {
-namespace asr {
- extern const int g_FrameLength;
- extern const int g_FrameStride;
- extern const float g_ScoreThreshold;
- extern const int g_ctxLen;
-} /* namespace asr */
-} /* namespace app */
-} /* namespace arm */
-
-namespace arm {
-namespace app {
-
- class Wav2LetterModel : public Model {
-
- public:
- /* Indices for the expected model - based on input and output tensor shapes */
- static constexpr uint32_t ms_inputRowsIdx = 1;
- static constexpr uint32_t ms_inputColsIdx = 2;
- static constexpr uint32_t ms_outputRowsIdx = 2;
- static constexpr uint32_t ms_outputColsIdx = 3;
-
- /* Model specific constants. */
- static constexpr uint32_t ms_blankTokenIdx = 28;
- static constexpr uint32_t ms_numMfccFeatures = 13;
-
- protected:
- /** @brief Gets the reference to op resolver interface class. */
- const tflite::MicroOpResolver& GetOpResolver() override;
-
- /** @brief Adds operations to the op resolver instance. */
- bool EnlistOperations() override;
-
- const uint8_t* ModelPointer() override;
-
- size_t ModelSize() override;
-
- private:
- /* Maximum number of individual operations that can be enlisted. */
- static constexpr int ms_maxOpCnt = 5;
-
- /* A mutable op resolver instance. */
- tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_ASR_WAV2LETTER_MODEL_HPP */
diff --git a/source/use_case/kws_asr/include/Wav2LetterPostprocess.hpp b/source/use_case/kws_asr/include/Wav2LetterPostprocess.hpp
deleted file mode 100644
index d1bc9a2..0000000
--- a/source/use_case/kws_asr/include/Wav2LetterPostprocess.hpp
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_ASR_WAV2LETTER_POSTPROCESS_HPP
-#define KWS_ASR_WAV2LETTER_POSTPROCESS_HPP
-
-#include "TensorFlowLiteMicro.hpp" /* TensorFlow headers. */
-#include "BaseProcessing.hpp"
-#include "AsrClassifier.hpp"
-#include "AsrResult.hpp"
-#include "log_macros.h"
-
-namespace arm {
-namespace app {
-
- /**
- * @brief Helper class to manage tensor post-processing for "wav2letter"
- * output.
- */
- class AsrPostProcess : public BasePostProcess {
- public:
- bool m_lastIteration = false; /* Flag to set if processing the last set of data for a clip. */
-
- /**
- * @brief Constructor
- * @param[in] outputTensor Pointer to the TFLite Micro output Tensor.
- * @param[in] classifier Object used to get top N results from classification.
- * @param[in] labels Vector of string labels to identify each output of the model.
- * @param[in/out] result Vector of classification results to store decoded outputs.
- * @param[in] outputContextLen Left/right context length for output tensor.
- * @param[in] blankTokenIdx Index in the labels that the "Blank token" takes.
- * @param[in] reductionAxis The axis that the logits of each time step is on.
- **/
- AsrPostProcess(TfLiteTensor* outputTensor, AsrClassifier& classifier,
- const std::vector<std::string>& labels, asr::ResultVec& result,
- uint32_t outputContextLen,
- uint32_t blankTokenIdx, uint32_t reductionAxis);
-
- /**
- * @brief Should perform post-processing of the result of inference then
- * populate ASR result data for any later use.
- * @return true if successful, false otherwise.
- **/
- bool DoPostProcess() override;
-
- /** @brief Gets the output inner length for post-processing. */
- static uint32_t GetOutputInnerLen(const TfLiteTensor*, uint32_t outputCtxLen);
-
- /** @brief Gets the output context length (left/right) for post-processing. */
- static uint32_t GetOutputContextLen(const Model& model, uint32_t inputCtxLen);
-
- /** @brief Gets the number of feature vectors to be computed. */
- static uint32_t GetNumFeatureVectors(const Model& model);
-
- private:
- AsrClassifier& m_classifier; /* ASR Classifier object. */
- TfLiteTensor* m_outputTensor; /* Model output tensor. */
- const std::vector<std::string>& m_labels; /* ASR Labels. */
- asr::ResultVec & m_results; /* Results vector for a single inference. */
- uint32_t m_outputContextLen; /* lengths of left/right contexts for output. */
- uint32_t m_outputInnerLen; /* Length of output inner context. */
- uint32_t m_totalLen; /* Total length of the required axis. */
- uint32_t m_countIterations; /* Current number of iterations. */
- uint32_t m_blankTokenIdx; /* Index of the labels blank token. */
- uint32_t m_reductionAxisIdx; /* Axis containing output logits for a single step. */
-
- /**
- * @brief Checks if the tensor and axis index are valid
- * inputs to the object - based on how it has been initialised.
- * @return true if valid, false otherwise.
- */
- bool IsInputValid(TfLiteTensor* tensor,
- uint32_t axisIdx) const;
-
- /**
- * @brief Gets the tensor data element size in bytes based
- * on the tensor type.
- * @return Size in bytes, 0 if not supported.
- */
- static uint32_t GetTensorElementSize(TfLiteTensor* tensor);
-
- /**
- * @brief Erases sections from the data assuming row-wise
- * arrangement along the context axis.
- * @return true if successful, false otherwise.
- */
- bool EraseSectionsRowWise(uint8_t* ptrData,
- uint32_t strideSzBytes,
- bool lastIteration);
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_ASR_WAV2LETTER_POSTPROCESS_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/include/Wav2LetterPreprocess.hpp b/source/use_case/kws_asr/include/Wav2LetterPreprocess.hpp
deleted file mode 100644
index 1224c23..0000000
--- a/source/use_case/kws_asr/include/Wav2LetterPreprocess.hpp
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_ASR_WAV2LETTER_PREPROCESS_HPP
-#define KWS_ASR_WAV2LETTER_PREPROCESS_HPP
-
-#include "Wav2LetterModel.hpp"
-#include "Wav2LetterMfcc.hpp"
-#include "AudioUtils.hpp"
-#include "DataStructures.hpp"
-#include "BaseProcessing.hpp"
-#include "log_macros.h"
-
-namespace arm {
-namespace app {
-
- /* Class to facilitate pre-processing calculation for Wav2Letter model
- * for ASR. */
- using AudioWindow = audio::SlidingWindow<const int16_t>;
-
- class AsrPreProcess : public BasePreProcess {
- public:
- /**
- * @brief Constructor.
- * @param[in] inputTensor Pointer to the TFLite Micro input Tensor.
- * @param[in] numMfccFeatures Number of MFCC features per window.
- * @param[in] numFeatureFrames Number of MFCC vectors that need to be calculated
- * for an inference.
- * @param[in] mfccWindowLen Number of audio elements to calculate MFCC features per window.
- * @param[in] mfccWindowStride Stride (in number of elements) for moving the MFCC window.
- */
- AsrPreProcess(TfLiteTensor* inputTensor,
- uint32_t numMfccFeatures,
- uint32_t numFeatureFrames,
- uint32_t mfccWindowLen,
- uint32_t mfccWindowStride);
-
- /**
- * @brief Calculates the features required from audio data. This
- * includes MFCC, first and second order deltas,
- * normalisation and finally, quantisation. The tensor is
- * populated with features from a given window placed along
- * in a single row.
- * @param[in] audioData Pointer to the first element of audio data.
- * @param[in] audioDataLen Number of elements in the audio data.
- * @return true if successful, false in case of error.
- */
- bool DoPreProcess(const void* audioData, size_t audioDataLen) override;
-
- protected:
- /**
- * @brief Computes the first and second order deltas for the
- * MFCC buffers - they are assumed to be populated.
- *
- * @param[in] mfcc MFCC buffers.
- * @param[out] delta1 Result of the first diff computation.
- * @param[out] delta2 Result of the second diff computation.
- * @return true if successful, false otherwise.
- */
- static bool ComputeDeltas(Array2d<float>& mfcc,
- Array2d<float>& delta1,
- Array2d<float>& delta2);
-
- /**
- * @brief Given a 2D vector of floats, rescale it to have mean of 0 and
- * standard deviation of 1.
- * @param[in,out] vec Vector of vector of floats.
- */
- static void StandardizeVecF32(Array2d<float>& vec);
-
- /**
- * @brief Standardizes all the MFCC and delta buffers to have mean 0 and std. dev 1.
- */
- void Standarize();
-
- /**
- * @brief Given the quantisation and data type limits, computes
- * the quantised values of a floating point input data.
- * @param[in] elem Element to be quantised.
- * @param[in] quantScale Scale.
- * @param[in] quantOffset Offset.
- * @param[in] minVal Numerical limit - minimum.
- * @param[in] maxVal Numerical limit - maximum.
- * @return Floating point quantised value.
- */
- static float GetQuantElem(
- float elem,
- float quantScale,
- int quantOffset,
- float minVal,
- float maxVal);
-
- /**
- * @brief Quantises the MFCC and delta buffers, and places them
- * in the output buffer. While doing so, it transposes
- * the data. Reason: Buffers in this class are arranged
- * for "time" axis to be row major. Primary reason for
- * this being the convolution speed up (as we can use
- * contiguous memory). The output, however, requires the
- * time axis to be in column major arrangement.
- * @param[in] outputBuf Pointer to the output buffer.
- * @param[in] outputBufSz Output buffer's size.
- * @param[in] quantScale Quantisation scale.
- * @param[in] quantOffset Quantisation offset.
- */
- template <typename T>
- bool Quantise(
- T* outputBuf,
- const uint32_t outputBufSz,
- const float quantScale,
- const int quantOffset)
- {
- /* Check the output size will fit everything. */
- if (outputBufSz < (this->m_mfccBuf.size(0) * 3 * sizeof(T))) {
- printf_err("Tensor size too small for features\n");
- return false;
- }
-
- /* Populate. */
- T* outputBufMfcc = outputBuf;
- T* outputBufD1 = outputBuf + this->m_numMfccFeats;
- T* outputBufD2 = outputBufD1 + this->m_numMfccFeats;
- const uint32_t ptrIncr = this->m_numMfccFeats * 2; /* (3 vectors - 1 vector) */
-
- const float minVal = std::numeric_limits<T>::min();
- const float maxVal = std::numeric_limits<T>::max();
-
- /* Need to transpose while copying and concatenating the tensor. */
- for (uint32_t j = 0; j < this->m_numFeatureFrames; ++j) {
- for (uint32_t i = 0; i < this->m_numMfccFeats; ++i) {
- *outputBufMfcc++ = static_cast<T>(AsrPreProcess::GetQuantElem(
- this->m_mfccBuf(i, j), quantScale,
- quantOffset, minVal, maxVal));
- *outputBufD1++ = static_cast<T>(AsrPreProcess::GetQuantElem(
- this->m_delta1Buf(i, j), quantScale,
- quantOffset, minVal, maxVal));
- *outputBufD2++ = static_cast<T>(AsrPreProcess::GetQuantElem(
- this->m_delta2Buf(i, j), quantScale,
- quantOffset, minVal, maxVal));
- }
- outputBufMfcc += ptrIncr;
- outputBufD1 += ptrIncr;
- outputBufD2 += ptrIncr;
- }
-
- return true;
- }
-
- private:
- audio::Wav2LetterMFCC m_mfcc; /* MFCC instance. */
- TfLiteTensor* m_inputTensor; /* Model input tensor. */
-
- /* Actual buffers to be populated. */
- Array2d<float> m_mfccBuf; /* Contiguous buffer 1D: MFCC */
- Array2d<float> m_delta1Buf; /* Contiguous buffer 1D: Delta 1 */
- Array2d<float> m_delta2Buf; /* Contiguous buffer 1D: Delta 2 */
-
- uint32_t m_mfccWindowLen; /* Window length for MFCC. */
- uint32_t m_mfccWindowStride; /* Window stride len for MFCC. */
- uint32_t m_numMfccFeats; /* Number of MFCC features per window. */
- uint32_t m_numFeatureFrames; /* How many sets of m_numMfccFeats. */
- AudioWindow m_mfccSlidingWindow; /* Sliding window to calculate MFCCs. */
-
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_ASR_WAV2LETTER_PREPROCESS_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/src/AsrClassifier.cc b/source/use_case/kws_asr/src/AsrClassifier.cc
deleted file mode 100644
index 9c18b14..0000000
--- a/source/use_case/kws_asr/src/AsrClassifier.cc
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "AsrClassifier.hpp"
-
-#include "log_macros.h"
-#include "TensorFlowLiteMicro.hpp"
-#include "Wav2LetterModel.hpp"
-
-template<typename T>
-bool arm::app::AsrClassifier::GetTopResults(TfLiteTensor* tensor,
- std::vector<ClassificationResult>& vecResults,
- const std::vector <std::string>& labels, double scale, double zeroPoint)
-{
- const uint32_t nElems = tensor->dims->data[arm::app::Wav2LetterModel::ms_outputRowsIdx];
- const uint32_t nLetters = tensor->dims->data[arm::app::Wav2LetterModel::ms_outputColsIdx];
-
- if (nLetters != labels.size()) {
- printf("Output size doesn't match the labels' size\n");
- return false;
- }
-
- /* NOTE: tensor's size verification against labels should be
- * checked by the calling/public function. */
- if (nLetters < 1) {
- return false;
- }
-
- /* Final results' container. */
- vecResults = std::vector<ClassificationResult>(nElems);
-
- T* tensorData = tflite::GetTensorData<T>(tensor);
-
- /* Get the top 1 results. */
- for (uint32_t i = 0, row = 0; i < nElems; ++i, row+=nLetters) {
- std::pair<T, uint32_t> top_1 = std::make_pair(tensorData[row], 0);
-
- for (uint32_t j = 1; j < nLetters; ++j) {
- if (top_1.first < tensorData[row + j]) {
- top_1.first = tensorData[row + j];
- top_1.second = j;
- }
- }
-
- double score = static_cast<int> (top_1.first);
- vecResults[i].m_normalisedVal = scale * (score - zeroPoint);
- vecResults[i].m_label = labels[top_1.second];
- vecResults[i].m_labelIdx = top_1.second;
- }
-
- return true;
-}
-template bool arm::app::AsrClassifier::GetTopResults<uint8_t>(TfLiteTensor* tensor,
- std::vector<ClassificationResult>& vecResults,
- const std::vector <std::string>& labels, double scale, double zeroPoint);
-template bool arm::app::AsrClassifier::GetTopResults<int8_t>(TfLiteTensor* tensor,
- std::vector<ClassificationResult>& vecResults,
- const std::vector <std::string>& labels, double scale, double zeroPoint);
-
-bool arm::app::AsrClassifier::GetClassificationResults(
- TfLiteTensor* outputTensor,
- std::vector<ClassificationResult>& vecResults,
- const std::vector <std::string>& labels, uint32_t topNCount, bool use_softmax)
-{
- UNUSED(use_softmax);
- vecResults.clear();
-
- constexpr int minTensorDims = static_cast<int>(
- (arm::app::Wav2LetterModel::ms_outputRowsIdx > arm::app::Wav2LetterModel::ms_outputColsIdx)?
- arm::app::Wav2LetterModel::ms_outputRowsIdx : arm::app::Wav2LetterModel::ms_outputColsIdx);
-
- constexpr uint32_t outColsIdx = arm::app::Wav2LetterModel::ms_outputColsIdx;
-
- /* Sanity checks. */
- if (outputTensor == nullptr) {
- printf_err("Output vector is null pointer.\n");
- return false;
- } else if (outputTensor->dims->size < minTensorDims) {
- printf_err("Output tensor expected to be 3D (1, m, n)\n");
- return false;
- } else if (static_cast<uint32_t>(outputTensor->dims->data[outColsIdx]) < topNCount) {
- printf_err("Output vectors are smaller than %" PRIu32 "\n", topNCount);
- return false;
- } else if (static_cast<uint32_t>(outputTensor->dims->data[outColsIdx]) != labels.size()) {
- printf("Output size doesn't match the labels' size\n");
- return false;
- }
-
- if (topNCount != 1) {
- warn("TopNCount value ignored in this implementation\n");
- }
-
- /* To return the floating point values, we need quantization parameters. */
- QuantParams quantParams = GetTensorQuantParams(outputTensor);
-
- bool resultState;
-
- switch (outputTensor->type) {
- case kTfLiteUInt8:
- resultState = this->GetTopResults<uint8_t>(
- outputTensor, vecResults,
- labels, quantParams.scale,
- quantParams.offset);
- break;
- case kTfLiteInt8:
- resultState = this->GetTopResults<int8_t>(
- outputTensor, vecResults,
- labels, quantParams.scale,
- quantParams.offset);
- break;
- default:
- printf_err("Tensor type %s not supported by classifier\n",
- TfLiteTypeGetName(outputTensor->type));
- return false;
- }
-
- if (!resultState) {
- printf_err("Failed to get sorted set\n");
- return false;
- }
-
- return true;
-} \ No newline at end of file
diff --git a/source/use_case/kws_asr/src/KwsProcessing.cc b/source/use_case/kws_asr/src/KwsProcessing.cc
deleted file mode 100644
index 328709d..0000000
--- a/source/use_case/kws_asr/src/KwsProcessing.cc
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "KwsProcessing.hpp"
-#include "ImageUtils.hpp"
-#include "log_macros.h"
-#include "MicroNetKwsModel.hpp"
-
-namespace arm {
-namespace app {
-
- KwsPreProcess::KwsPreProcess(TfLiteTensor* inputTensor, size_t numFeatures, size_t numMfccFrames,
- int mfccFrameLength, int mfccFrameStride
- ):
- m_inputTensor{inputTensor},
- m_mfccFrameLength{mfccFrameLength},
- m_mfccFrameStride{mfccFrameStride},
- m_numMfccFrames{numMfccFrames},
- m_mfcc{audio::MicroNetKwsMFCC(numFeatures, mfccFrameLength)}
- {
- this->m_mfcc.Init();
-
- /* Deduce the data length required for 1 inference from the network parameters. */
- this->m_audioDataWindowSize = this->m_numMfccFrames * this->m_mfccFrameStride +
- (this->m_mfccFrameLength - this->m_mfccFrameStride);
-
- /* Creating an MFCC feature sliding window for the data required for 1 inference. */
- this->m_mfccSlidingWindow = audio::SlidingWindow<const int16_t>(nullptr, this->m_audioDataWindowSize,
- this->m_mfccFrameLength, this->m_mfccFrameStride);
-
- /* For longer audio clips we choose to move by half the audio window size
- * => for a 1 second window size there is an overlap of 0.5 seconds. */
- this->m_audioDataStride = this->m_audioDataWindowSize / 2;
-
- /* To have the previously calculated features re-usable, stride must be multiple
- * of MFCC features window stride. Reduce stride through audio if needed. */
- if (0 != this->m_audioDataStride % this->m_mfccFrameStride) {
- this->m_audioDataStride -= this->m_audioDataStride % this->m_mfccFrameStride;
- }
-
- this->m_numMfccVectorsInAudioStride = this->m_audioDataStride / this->m_mfccFrameStride;
-
- /* Calculate number of the feature vectors in the window overlap region.
- * These feature vectors will be reused.*/
- this->m_numReusedMfccVectors = this->m_mfccSlidingWindow.TotalStrides() + 1
- - this->m_numMfccVectorsInAudioStride;
-
- /* Construct feature calculation function. */
- this->m_mfccFeatureCalculator = GetFeatureCalculator(this->m_mfcc, this->m_inputTensor,
- this->m_numReusedMfccVectors);
-
- if (!this->m_mfccFeatureCalculator) {
- printf_err("Feature calculator not initialized.");
- }
- }
-
- bool KwsPreProcess::DoPreProcess(const void* data, size_t inputSize)
- {
- UNUSED(inputSize);
- if (data == nullptr) {
- printf_err("Data pointer is null");
- }
-
- /* Set the features sliding window to the new address. */
- auto input = static_cast<const int16_t*>(data);
- this->m_mfccSlidingWindow.Reset(input);
-
- /* Cache is only usable if we have more than 1 inference in an audio clip. */
- bool useCache = this->m_audioWindowIndex > 0 && this->m_numReusedMfccVectors > 0;
-
- /* Use a sliding window to calculate MFCC features frame by frame. */
- while (this->m_mfccSlidingWindow.HasNext()) {
- const int16_t* mfccWindow = this->m_mfccSlidingWindow.Next();
-
- std::vector<int16_t> mfccFrameAudioData = std::vector<int16_t>(mfccWindow,
- mfccWindow + this->m_mfccFrameLength);
-
- /* Compute features for this window and write them to input tensor. */
- this->m_mfccFeatureCalculator(mfccFrameAudioData, this->m_mfccSlidingWindow.Index(),
- useCache, this->m_numMfccVectorsInAudioStride);
- }
-
- debug("Input tensor populated \n");
-
- return true;
- }
-
- /**
- * @brief Generic feature calculator factory.
- *
- * Returns lambda function to compute features using features cache.
- * Real features math is done by a lambda function provided as a parameter.
- * Features are written to input tensor memory.
- *
- * @tparam T Feature vector type.
- * @param[in] inputTensor Model input tensor pointer.
- * @param[in] cacheSize Number of feature vectors to cache. Defined by the sliding window overlap.
- * @param[in] compute Features calculator function.
- * @return Lambda function to compute features.
- */
- template<class T>
- std::function<void (std::vector<int16_t>&, size_t, bool, size_t)>
- KwsPreProcess::FeatureCalc(TfLiteTensor* inputTensor, size_t cacheSize,
- std::function<std::vector<T> (std::vector<int16_t>& )> compute)
- {
- /* Feature cache to be captured by lambda function. */
- static std::vector<std::vector<T>> featureCache = std::vector<std::vector<T>>(cacheSize);
-
- return [=](std::vector<int16_t>& audioDataWindow,
- size_t index,
- bool useCache,
- size_t featuresOverlapIndex)
- {
- T* tensorData = tflite::GetTensorData<T>(inputTensor);
- std::vector<T> features;
-
- /* Reuse features from cache if cache is ready and sliding windows overlap.
- * Overlap is in the beginning of sliding window with a size of a feature cache. */
- if (useCache && index < featureCache.size()) {
- features = std::move(featureCache[index]);
- } else {
- features = std::move(compute(audioDataWindow));
- }
- auto size = features.size();
- auto sizeBytes = sizeof(T) * size;
- std::memcpy(tensorData + (index * size), features.data(), sizeBytes);
-
- /* Start renewing cache as soon iteration goes out of the windows overlap. */
- if (index >= featuresOverlapIndex) {
- featureCache[index - featuresOverlapIndex] = std::move(features);
- }
- };
- }
-
- template std::function<void (std::vector<int16_t>&, size_t , bool, size_t)>
- KwsPreProcess::FeatureCalc<int8_t>(TfLiteTensor* inputTensor,
- size_t cacheSize,
- std::function<std::vector<int8_t> (std::vector<int16_t>&)> compute);
-
- template std::function<void(std::vector<int16_t>&, size_t, bool, size_t)>
- KwsPreProcess::FeatureCalc<float>(TfLiteTensor* inputTensor,
- size_t cacheSize,
- std::function<std::vector<float>(std::vector<int16_t>&)> compute);
-
-
- std::function<void (std::vector<int16_t>&, int, bool, size_t)>
- KwsPreProcess::GetFeatureCalculator(audio::MicroNetKwsMFCC& mfcc, TfLiteTensor* inputTensor, size_t cacheSize)
- {
- std::function<void (std::vector<int16_t>&, size_t, bool, size_t)> mfccFeatureCalc;
-
- TfLiteQuantization quant = inputTensor->quantization;
-
- if (kTfLiteAffineQuantization == quant.type) {
- auto *quantParams = (TfLiteAffineQuantization *) quant.params;
- const float quantScale = quantParams->scale->data[0];
- const int quantOffset = quantParams->zero_point->data[0];
-
- switch (inputTensor->type) {
- case kTfLiteInt8: {
- mfccFeatureCalc = this->FeatureCalc<int8_t>(inputTensor,
- cacheSize,
- [=, &mfcc](std::vector<int16_t>& audioDataWindow) {
- return mfcc.MfccComputeQuant<int8_t>(audioDataWindow,
- quantScale,
- quantOffset);
- }
- );
- break;
- }
- default:
- printf_err("Tensor type %s not supported\n", TfLiteTypeGetName(inputTensor->type));
- }
- } else {
- mfccFeatureCalc = this->FeatureCalc<float>(inputTensor, cacheSize,
- [&mfcc](std::vector<int16_t>& audioDataWindow) {
- return mfcc.MfccCompute(audioDataWindow); }
- );
- }
- return mfccFeatureCalc;
- }
-
- KwsPostProcess::KwsPostProcess(TfLiteTensor* outputTensor, Classifier& classifier,
- const std::vector<std::string>& labels,
- std::vector<ClassificationResult>& results)
- :m_outputTensor{outputTensor},
- m_kwsClassifier{classifier},
- m_labels{labels},
- m_results{results}
- {}
-
- bool KwsPostProcess::DoPostProcess()
- {
- return this->m_kwsClassifier.GetClassificationResults(
- this->m_outputTensor, this->m_results,
- this->m_labels, 1, true);
- }
-
-} /* namespace app */
-} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/src/MainLoop.cc b/source/use_case/kws_asr/src/MainLoop.cc
index f1d97a0..2365264 100644
--- a/source/use_case/kws_asr/src/MainLoop.cc
+++ b/source/use_case/kws_asr/src/MainLoop.cc
@@ -23,7 +23,24 @@
#include "Wav2LetterModel.hpp" /* ASR model class for running inference. */
#include "UseCaseCommonUtils.hpp" /* Utils functions. */
#include "UseCaseHandler.hpp" /* Handlers for different user options. */
-#include "log_macros.h"
+#include "log_macros.h" /* Logging functions */
+#include "BufAttributes.hpp" /* Buffer attributes to be applied */
+
+namespace arm {
+namespace app {
+ static uint8_t tensorArena[ACTIVATION_BUF_SZ] ACTIVATION_BUF_ATTRIBUTE;
+
+ namespace asr {
+ extern uint8_t* GetModelPointer();
+ extern size_t GetModelLen();
+ }
+
+ namespace kws {
+ extern uint8_t* GetModelPointer();
+ extern size_t GetModelLen();
+ }
+} /* namespace app */
+} /* namespace arm */
using KwsClassifier = arm::app::Classifier;
@@ -60,14 +77,29 @@ void main_loop()
arm::app::Wav2LetterModel asrModel;
/* Load the models. */
- if (!kwsModel.Init()) {
+ if (!kwsModel.Init(arm::app::tensorArena,
+ sizeof(arm::app::tensorArena),
+ arm::app::kws::GetModelPointer(),
+ arm::app::kws::GetModelLen())) {
printf_err("Failed to initialise KWS model\n");
return;
}
+#if !defined(ARM_NPU)
+ /* If it is not a NPU build check if the model contains a NPU operator */
+ if (kwsModel.ContainsEthosUOperator()) {
+ printf_err("No driver support for Ethos-U operator found in the KWS model.\n");
+ return;
+ }
+#endif /* ARM_NPU */
+
/* Initialise the asr model using the same allocator from KWS
* to re-use the tensor arena. */
- if (!asrModel.Init(kwsModel.GetAllocator())) {
+ if (!asrModel.Init(arm::app::tensorArena,
+ sizeof(arm::app::tensorArena),
+ arm::app::asr::GetModelPointer(),
+ arm::app::asr::GetModelLen(),
+ kwsModel.GetAllocator())) {
printf_err("Failed to initialise ASR model\n");
return;
} else if (!VerifyTensorDimensions(asrModel)) {
@@ -75,6 +107,14 @@ void main_loop()
return;
}
+#if !defined(ARM_NPU)
+ /* If it is not a NPU build check if the model contains a NPU operator */
+ if (asrModel.ContainsEthosUOperator()) {
+ printf_err("No driver support for Ethos-U operator found in the ASR model.\n");
+ return;
+ }
+#endif /* ARM_NPU */
+
/* Instantiate application context. */
arm::app::ApplicationContext caseContext;
diff --git a/source/use_case/kws_asr/src/MicroNetKwsModel.cc b/source/use_case/kws_asr/src/MicroNetKwsModel.cc
deleted file mode 100644
index 663faa0..0000000
--- a/source/use_case/kws_asr/src/MicroNetKwsModel.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "MicroNetKwsModel.hpp"
-#include "log_macros.h"
-
-namespace arm {
-namespace app {
-namespace kws {
- extern uint8_t* GetModelPointer();
- extern size_t GetModelLen();
-} /* namespace kws */
-} /* namespace app */
-} /* namespace arm */
-
-const tflite::MicroOpResolver& arm::app::MicroNetKwsModel::GetOpResolver()
-{
- return this->m_opResolver;
-}
-
-bool arm::app::MicroNetKwsModel::EnlistOperations()
-{
- this->m_opResolver.AddAveragePool2D();
- this->m_opResolver.AddConv2D();
- this->m_opResolver.AddDepthwiseConv2D();
- this->m_opResolver.AddFullyConnected();
- this->m_opResolver.AddRelu();
- this->m_opResolver.AddReshape();
-
-#if defined(ARM_NPU)
- if (kTfLiteOk == this->m_opResolver.AddEthosU()) {
- info("Added %s support to op resolver\n",
- tflite::GetString_ETHOSU());
- } else {
- printf_err("Failed to add Arm NPU support to op resolver.");
- return false;
- }
-#endif /* ARM_NPU */
- return true;
-}
-
-const uint8_t* arm::app::MicroNetKwsModel::ModelPointer()
-{
- return arm::app::kws::GetModelPointer();
-}
-
-size_t arm::app::MicroNetKwsModel::ModelSize()
-{
- return arm::app::kws::GetModelLen();
-} \ No newline at end of file
diff --git a/source/use_case/kws_asr/src/OutputDecode.cc b/source/use_case/kws_asr/src/OutputDecode.cc
deleted file mode 100644
index 41fbe07..0000000
--- a/source/use_case/kws_asr/src/OutputDecode.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "OutputDecode.hpp"
-
-namespace arm {
-namespace app {
-namespace audio {
-namespace asr {
-
- std::string DecodeOutput(const std::vector<ClassificationResult>& vecResults)
- {
- std::string CleanOutputBuffer;
-
- for (size_t i = 0; i < vecResults.size(); ++i) /* For all elements in vector. */
- {
- while (i+1 < vecResults.size() &&
- vecResults[i].m_label == vecResults[i+1].m_label) /* While the current element is equal to the next, ignore it and move on. */
- {
- ++i;
- }
- if (vecResults[i].m_label != "$") /* $ is a character used to represent unknown and double characters so should not be in output. */
- {
- CleanOutputBuffer += vecResults[i].m_label; /* If the element is different to the next, it will be appended to CleanOutputBuffer. */
- }
- }
-
- return CleanOutputBuffer; /* Return string type containing clean output. */
- }
-
-} /* namespace asr */
-} /* namespace audio */
-} /* namespace app */
-} /* namespace arm */
diff --git a/source/use_case/kws_asr/src/UseCaseHandler.cc b/source/use_case/kws_asr/src/UseCaseHandler.cc
index 01aefae..9427ae0 100644
--- a/source/use_case/kws_asr/src/UseCaseHandler.cc
+++ b/source/use_case/kws_asr/src/UseCaseHandler.cc
@@ -25,6 +25,7 @@
#include "MicroNetKwsMfcc.hpp"
#include "Classifier.hpp"
#include "KwsResult.hpp"
+#include "Wav2LetterModel.hpp"
#include "Wav2LetterMfcc.hpp"
#include "Wav2LetterPreprocess.hpp"
#include "Wav2LetterPostprocess.hpp"
@@ -470,4 +471,4 @@ namespace app {
}
} /* namespace app */
-} /* namespace arm */ \ No newline at end of file
+} /* namespace arm */
diff --git a/source/use_case/kws_asr/src/Wav2LetterMfcc.cc b/source/use_case/kws_asr/src/Wav2LetterMfcc.cc
deleted file mode 100644
index f2c50f3..0000000
--- a/source/use_case/kws_asr/src/Wav2LetterMfcc.cc
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "Wav2LetterMfcc.hpp"
-
-#include "PlatformMath.hpp"
-#include "log_macros.h"
-
-#include <cfloat>
-
-namespace arm {
-namespace app {
-namespace audio {
-
- bool Wav2LetterMFCC::ApplyMelFilterBank(
- std::vector<float>& fftVec,
- std::vector<std::vector<float>>& melFilterBank,
- std::vector<uint32_t>& filterBankFilterFirst,
- std::vector<uint32_t>& filterBankFilterLast,
- std::vector<float>& melEnergies)
- {
- const size_t numBanks = melEnergies.size();
-
- if (numBanks != filterBankFilterFirst.size() ||
- numBanks != filterBankFilterLast.size()) {
- printf_err("unexpected filter bank lengths\n");
- return false;
- }
-
- for (size_t bin = 0; bin < numBanks; ++bin) {
- auto filterBankIter = melFilterBank[bin].begin();
- auto end = melFilterBank[bin].end();
- /* Avoid log of zero at later stages, same value used in librosa.
- * The number was used during our default wav2letter model training. */
- float melEnergy = 1e-10;
- const uint32_t firstIndex = filterBankFilterFirst[bin];
- const uint32_t lastIndex = std::min<uint32_t>(filterBankFilterLast[bin], fftVec.size() - 1);
-
- for (uint32_t i = firstIndex; i <= lastIndex && filterBankIter != end; ++i) {
- melEnergy += (*filterBankIter++ * fftVec[i]);
- }
-
- melEnergies[bin] = melEnergy;
- }
-
- return true;
- }
-
- void Wav2LetterMFCC::ConvertToLogarithmicScale(
- std::vector<float>& melEnergies)
- {
- float maxMelEnergy = -FLT_MAX;
-
- /* Container for natural logarithms of mel energies. */
- std::vector <float> vecLogEnergies(melEnergies.size(), 0.f);
-
- /* Because we are taking natural logs, we need to multiply by log10(e).
- * Also, for wav2letter model, we scale our log10 values by 10. */
- constexpr float multiplier = 10.0 * /* Default scalar. */
- 0.4342944819032518; /* log10f(std::exp(1.0))*/
-
- /* Take log of the whole vector. */
- math::MathUtils::VecLogarithmF32(melEnergies, vecLogEnergies);
-
- /* Scale the log values and get the max. */
- for (auto iterM = melEnergies.begin(), iterL = vecLogEnergies.begin();
- iterM != melEnergies.end() && iterL != vecLogEnergies.end(); ++iterM, ++iterL) {
-
- *iterM = *iterL * multiplier;
-
- /* Save the max mel energy. */
- if (*iterM > maxMelEnergy) {
- maxMelEnergy = *iterM;
- }
- }
-
- /* Clamp the mel energies. */
- constexpr float maxDb = 80.0;
- const float clampLevelLowdB = maxMelEnergy - maxDb;
- for (float & melEnergie : melEnergies) {
- melEnergie = std::max(melEnergie, clampLevelLowdB);
- }
- }
-
- std::vector<float> Wav2LetterMFCC::CreateDCTMatrix(
- const int32_t inputLength,
- const int32_t coefficientCount)
- {
- std::vector<float> dctMatix(inputLength * coefficientCount);
-
- /* Orthonormal normalization. */
- const float normalizerK0 = 2 * math::MathUtils::SqrtF32(1.0f /
- static_cast<float>(4*inputLength));
- const float normalizer = 2 * math::MathUtils::SqrtF32(1.0f /
- static_cast<float>(2*inputLength));
-
- const float angleIncr = M_PI/inputLength;
- float angle = angleIncr; /* We start using it at k = 1 loop. */
-
- /* First row of DCT will use normalizer K0 */
- for (int32_t n = 0; n < inputLength; ++n) {
- dctMatix[n] = normalizerK0 /* cos(0) = 1 */;
- }
-
- /* Second row (index = 1) onwards, we use standard normalizer. */
- for (int32_t k = 1, m = inputLength; k < coefficientCount; ++k, m += inputLength) {
- for (int32_t n = 0; n < inputLength; ++n) {
- dctMatix[m+n] = normalizer *
- math::MathUtils::CosineF32((n + 0.5f) * angle);
- }
- angle += angleIncr;
- }
- return dctMatix;
- }
-
- float Wav2LetterMFCC::GetMelFilterBankNormaliser(
- const float& leftMel,
- const float& rightMel,
- const bool useHTKMethod)
- {
- /* Slaney normalization for mel weights. */
- return (2.0f / (MFCC::InverseMelScale(rightMel, useHTKMethod) -
- MFCC::InverseMelScale(leftMel, useHTKMethod)));
- }
-
-} /* namespace audio */
-} /* namespace app */
-} /* namespace arm */
diff --git a/source/use_case/kws_asr/src/Wav2LetterModel.cc b/source/use_case/kws_asr/src/Wav2LetterModel.cc
deleted file mode 100644
index 52bd23a..0000000
--- a/source/use_case/kws_asr/src/Wav2LetterModel.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "Wav2LetterModel.hpp"
-#include "log_macros.h"
-
-namespace arm {
-namespace app {
-namespace asr {
- extern uint8_t* GetModelPointer();
- extern size_t GetModelLen();
-}
-} /* namespace app */
-} /* namespace arm */
-
-const tflite::MicroOpResolver& arm::app::Wav2LetterModel::GetOpResolver()
-{
- return this->m_opResolver;
-}
-
-bool arm::app::Wav2LetterModel::EnlistOperations()
-{
- this->m_opResolver.AddConv2D();
- this->m_opResolver.AddLeakyRelu();
- this->m_opResolver.AddSoftmax();
- this->m_opResolver.AddReshape();
-
-#if defined(ARM_NPU)
- if (kTfLiteOk == this->m_opResolver.AddEthosU()) {
- info("Added %s support to op resolver\n",
- tflite::GetString_ETHOSU());
- } else {
- printf_err("Failed to add Arm NPU support to op resolver.");
- return false;
- }
-#endif /* ARM_NPU */
- return true;
-}
-
-const uint8_t* arm::app::Wav2LetterModel::ModelPointer()
-{
- return arm::app::asr::GetModelPointer();
-}
-
-size_t arm::app::Wav2LetterModel::ModelSize()
-{
- return arm::app::asr::GetModelLen();
-} \ No newline at end of file
diff --git a/source/use_case/kws_asr/src/Wav2LetterPostprocess.cc b/source/use_case/kws_asr/src/Wav2LetterPostprocess.cc
deleted file mode 100644
index 42f434e..0000000
--- a/source/use_case/kws_asr/src/Wav2LetterPostprocess.cc
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "Wav2LetterPostprocess.hpp"
-
-#include "Wav2LetterModel.hpp"
-#include "log_macros.h"
-
-#include <cmath>
-
-namespace arm {
-namespace app {
-
- AsrPostProcess::AsrPostProcess(TfLiteTensor* outputTensor, AsrClassifier& classifier,
- const std::vector<std::string>& labels, std::vector<ClassificationResult>& results,
- const uint32_t outputContextLen,
- const uint32_t blankTokenIdx, const uint32_t reductionAxisIdx
- ):
- m_classifier(classifier),
- m_outputTensor(outputTensor),
- m_labels{labels},
- m_results(results),
- m_outputContextLen(outputContextLen),
- m_countIterations(0),
- m_blankTokenIdx(blankTokenIdx),
- m_reductionAxisIdx(reductionAxisIdx)
- {
- this->m_outputInnerLen = AsrPostProcess::GetOutputInnerLen(this->m_outputTensor, this->m_outputContextLen);
- this->m_totalLen = (2 * this->m_outputContextLen + this->m_outputInnerLen);
- }
-
- bool AsrPostProcess::DoPostProcess()
- {
- /* Basic checks. */
- if (!this->IsInputValid(this->m_outputTensor, this->m_reductionAxisIdx)) {
- return false;
- }
-
- /* Irrespective of tensor type, we use unsigned "byte" */
- auto* ptrData = tflite::GetTensorData<uint8_t>(this->m_outputTensor);
- const uint32_t elemSz = AsrPostProcess::GetTensorElementSize(this->m_outputTensor);
-
- /* Other sanity checks. */
- if (0 == elemSz) {
- printf_err("Tensor type not supported for post processing\n");
- return false;
- } else if (elemSz * this->m_totalLen > this->m_outputTensor->bytes) {
- printf_err("Insufficient number of tensor bytes\n");
- return false;
- }
-
- /* Which axis do we need to process? */
- switch (this->m_reductionAxisIdx) {
- case Wav2LetterModel::ms_outputRowsIdx:
- this->EraseSectionsRowWise(
- ptrData, elemSz * this->m_outputTensor->dims->data[Wav2LetterModel::ms_outputColsIdx],
- this->m_lastIteration);
- break;
- default:
- printf_err("Unsupported axis index: %" PRIu32 "\n", this->m_reductionAxisIdx);
- return false;
- }
- this->m_classifier.GetClassificationResults(this->m_outputTensor,
- this->m_results, this->m_labels, 1);
-
- return true;
- }
-
- bool AsrPostProcess::IsInputValid(TfLiteTensor* tensor, const uint32_t axisIdx) const
- {
- if (nullptr == tensor) {
- return false;
- }
-
- if (static_cast<int>(axisIdx) >= tensor->dims->size) {
- printf_err("Invalid axis index: %" PRIu32 "; Max: %d\n",
- axisIdx, tensor->dims->size);
- return false;
- }
-
- if (static_cast<int>(this->m_totalLen) !=
- tensor->dims->data[axisIdx]) {
- printf_err("Unexpected tensor dimension for axis %d, got %d, \n",
- axisIdx, tensor->dims->data[axisIdx]);
- return false;
- }
-
- return true;
- }
-
- uint32_t AsrPostProcess::GetTensorElementSize(TfLiteTensor* tensor)
- {
- switch(tensor->type) {
- case kTfLiteUInt8:
- case kTfLiteInt8:
- return 1;
- case kTfLiteInt16:
- return 2;
- case kTfLiteInt32:
- case kTfLiteFloat32:
- return 4;
- default:
- printf_err("Unsupported tensor type %s\n",
- TfLiteTypeGetName(tensor->type));
- }
-
- return 0;
- }
-
- bool AsrPostProcess::EraseSectionsRowWise(
- uint8_t* ptrData,
- const uint32_t strideSzBytes,
- const bool lastIteration)
- {
- /* In this case, the "zero-ing" is quite simple as the region
- * to be zeroed sits in contiguous memory (row-major). */
- const uint32_t eraseLen = strideSzBytes * this->m_outputContextLen;
-
- /* Erase left context? */
- if (this->m_countIterations > 0) {
- /* Set output of each classification window to the blank token. */
- std::memset(ptrData, 0, eraseLen);
- for (size_t windowIdx = 0; windowIdx < this->m_outputContextLen; windowIdx++) {
- ptrData[windowIdx*strideSzBytes + this->m_blankTokenIdx] = 1;
- }
- }
-
- /* Erase right context? */
- if (false == lastIteration) {
- uint8_t* rightCtxPtr = ptrData + (strideSzBytes * (this->m_outputContextLen + this->m_outputInnerLen));
- /* Set output of each classification window to the blank token. */
- std::memset(rightCtxPtr, 0, eraseLen);
- for (size_t windowIdx = 0; windowIdx < this->m_outputContextLen; windowIdx++) {
- rightCtxPtr[windowIdx*strideSzBytes + this->m_blankTokenIdx] = 1;
- }
- }
-
- if (lastIteration) {
- this->m_countIterations = 0;
- } else {
- ++this->m_countIterations;
- }
-
- return true;
- }
-
- uint32_t AsrPostProcess::GetNumFeatureVectors(const Model& model)
- {
- TfLiteTensor* inputTensor = model.GetInputTensor(0);
- const int inputRows = std::max(inputTensor->dims->data[Wav2LetterModel::ms_inputRowsIdx], 0);
- if (inputRows == 0) {
- printf_err("Error getting number of input rows for axis: %" PRIu32 "\n",
- Wav2LetterModel::ms_inputRowsIdx);
- }
- return inputRows;
- }
-
- uint32_t AsrPostProcess::GetOutputInnerLen(const TfLiteTensor* outputTensor, const uint32_t outputCtxLen)
- {
- const uint32_t outputRows = std::max(outputTensor->dims->data[Wav2LetterModel::ms_outputRowsIdx], 0);
- if (outputRows == 0) {
- printf_err("Error getting number of output rows for axis: %" PRIu32 "\n",
- Wav2LetterModel::ms_outputRowsIdx);
- }
-
- /* Watching for underflow. */
- int innerLen = (outputRows - (2 * outputCtxLen));
-
- return std::max(innerLen, 0);
- }
-
- uint32_t AsrPostProcess::GetOutputContextLen(const Model& model, const uint32_t inputCtxLen)
- {
- const uint32_t inputRows = AsrPostProcess::GetNumFeatureVectors(model);
- const uint32_t inputInnerLen = inputRows - (2 * inputCtxLen);
- constexpr uint32_t ms_outputRowsIdx = Wav2LetterModel::ms_outputRowsIdx;
-
- /* Check to make sure that the input tensor supports the above
- * context and inner lengths. */
- if (inputRows <= 2 * inputCtxLen || inputRows <= inputInnerLen) {
- printf_err("Input rows not compatible with ctx of %" PRIu32 "\n",
- inputCtxLen);
- return 0;
- }
-
- TfLiteTensor* outputTensor = model.GetOutputTensor(0);
- const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);
- if (outputRows == 0) {
- printf_err("Error getting number of output rows for axis: %" PRIu32 "\n",
- Wav2LetterModel::ms_outputRowsIdx);
- return 0;
- }
-
- const float inOutRowRatio = static_cast<float>(inputRows) /
- static_cast<float>(outputRows);
-
- return std::round(static_cast<float>(inputCtxLen) / inOutRowRatio);
- }
-
-} /* namespace app */
-} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/src/Wav2LetterPreprocess.cc b/source/use_case/kws_asr/src/Wav2LetterPreprocess.cc
deleted file mode 100644
index 92b0631..0000000
--- a/source/use_case/kws_asr/src/Wav2LetterPreprocess.cc
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "Wav2LetterPreprocess.hpp"
-
-#include "PlatformMath.hpp"
-#include "TensorFlowLiteMicro.hpp"
-
-#include <algorithm>
-#include <cmath>
-
-namespace arm {
-namespace app {
-
- AsrPreProcess::AsrPreProcess(TfLiteTensor* inputTensor, const uint32_t numMfccFeatures,
- const uint32_t numFeatureFrames, const uint32_t mfccWindowLen,
- const uint32_t mfccWindowStride
- ):
- m_mfcc(numMfccFeatures, mfccWindowLen),
- m_inputTensor(inputTensor),
- m_mfccBuf(numMfccFeatures, numFeatureFrames),
- m_delta1Buf(numMfccFeatures, numFeatureFrames),
- m_delta2Buf(numMfccFeatures, numFeatureFrames),
- m_mfccWindowLen(mfccWindowLen),
- m_mfccWindowStride(mfccWindowStride),
- m_numMfccFeats(numMfccFeatures),
- m_numFeatureFrames(numFeatureFrames)
- {
- if (numMfccFeatures > 0 && mfccWindowLen > 0) {
- this->m_mfcc.Init();
- }
- }
-
- bool AsrPreProcess::DoPreProcess(const void* audioData, const size_t audioDataLen)
- {
- this->m_mfccSlidingWindow = audio::SlidingWindow<const int16_t>(
- static_cast<const int16_t*>(audioData), audioDataLen,
- this->m_mfccWindowLen, this->m_mfccWindowStride);
-
- uint32_t mfccBufIdx = 0;
-
- std::fill(m_mfccBuf.begin(), m_mfccBuf.end(), 0.f);
- std::fill(m_delta1Buf.begin(), m_delta1Buf.end(), 0.f);
- std::fill(m_delta2Buf.begin(), m_delta2Buf.end(), 0.f);
-
- /* While we can slide over the audio. */
- while (this->m_mfccSlidingWindow.HasNext()) {
- const int16_t* mfccWindow = this->m_mfccSlidingWindow.Next();
- auto mfccAudioData = std::vector<int16_t>(
- mfccWindow,
- mfccWindow + this->m_mfccWindowLen);
- auto mfcc = this->m_mfcc.MfccCompute(mfccAudioData);
- for (size_t i = 0; i < this->m_mfccBuf.size(0); ++i) {
- this->m_mfccBuf(i, mfccBufIdx) = mfcc[i];
- }
- ++mfccBufIdx;
- }
-
- /* Pad MFCC if needed by adding MFCC for zeros. */
- if (mfccBufIdx != this->m_numFeatureFrames) {
- std::vector<int16_t> zerosWindow = std::vector<int16_t>(this->m_mfccWindowLen, 0);
- std::vector<float> mfccZeros = this->m_mfcc.MfccCompute(zerosWindow);
-
- while (mfccBufIdx != this->m_numFeatureFrames) {
- memcpy(&this->m_mfccBuf(0, mfccBufIdx),
- mfccZeros.data(), sizeof(float) * m_numMfccFeats);
- ++mfccBufIdx;
- }
- }
-
- /* Compute first and second order deltas from MFCCs. */
- AsrPreProcess::ComputeDeltas(this->m_mfccBuf, this->m_delta1Buf, this->m_delta2Buf);
-
- /* Standardize calculated features. */
- this->Standarize();
-
- /* Quantise. */
- QuantParams quantParams = GetTensorQuantParams(this->m_inputTensor);
-
- if (0 == quantParams.scale) {
- printf_err("Quantisation scale can't be 0\n");
- return false;
- }
-
- switch(this->m_inputTensor->type) {
- case kTfLiteUInt8:
- return this->Quantise<uint8_t>(
- tflite::GetTensorData<uint8_t>(this->m_inputTensor), this->m_inputTensor->bytes,
- quantParams.scale, quantParams.offset);
- case kTfLiteInt8:
- return this->Quantise<int8_t>(
- tflite::GetTensorData<int8_t>(this->m_inputTensor), this->m_inputTensor->bytes,
- quantParams.scale, quantParams.offset);
- default:
- printf_err("Unsupported tensor type %s\n",
- TfLiteTypeGetName(this->m_inputTensor->type));
- }
-
- return false;
- }
-
- bool AsrPreProcess::ComputeDeltas(Array2d<float>& mfcc,
- Array2d<float>& delta1,
- Array2d<float>& delta2)
- {
- const std::vector <float> delta1Coeffs =
- {6.66666667e-02, 5.00000000e-02, 3.33333333e-02,
- 1.66666667e-02, -3.46944695e-18, -1.66666667e-02,
- -3.33333333e-02, -5.00000000e-02, -6.66666667e-02};
-
- const std::vector <float> delta2Coeffs =
- {0.06060606, 0.01515152, -0.01731602,
- -0.03679654, -0.04329004, -0.03679654,
- -0.01731602, 0.01515152, 0.06060606};
-
- if (delta1.size(0) == 0 || delta2.size(0) != delta1.size(0) ||
- mfcc.size(0) == 0 || mfcc.size(1) == 0) {
- return false;
- }
-
- /* Get the middle index; coeff vec len should always be odd. */
- const size_t coeffLen = delta1Coeffs.size();
- const size_t fMidIdx = (coeffLen - 1)/2;
- const size_t numFeatures = mfcc.size(0);
- const size_t numFeatVectors = mfcc.size(1);
-
- /* Iterate through features in MFCC vector. */
- for (size_t i = 0; i < numFeatures; ++i) {
- /* For each feature, iterate through time (t) samples representing feature evolution and
- * calculate d/dt and d^2/dt^2, using 1D convolution with differential kernels.
- * Convolution padding = valid, result size is `time length - kernel length + 1`.
- * The result is padded with 0 from both sides to match the size of initial time samples data.
- *
- * For the small filter, conv1D implementation as a simple loop is efficient enough.
- * Filters of a greater size would need CMSIS-DSP functions to be used, like arm_fir_f32.
- */
-
- for (size_t j = fMidIdx; j < numFeatVectors - fMidIdx; ++j) {
- float d1 = 0;
- float d2 = 0;
- const size_t mfccStIdx = j - fMidIdx;
-
- for (size_t k = 0, m = coeffLen - 1; k < coeffLen; ++k, --m) {
-
- d1 += mfcc(i,mfccStIdx + k) * delta1Coeffs[m];
- d2 += mfcc(i,mfccStIdx + k) * delta2Coeffs[m];
- }
-
- delta1(i,j) = d1;
- delta2(i,j) = d2;
- }
- }
-
- return true;
- }
-
- void AsrPreProcess::StandardizeVecF32(Array2d<float>& vec)
- {
- auto mean = math::MathUtils::MeanF32(vec.begin(), vec.totalSize());
- auto stddev = math::MathUtils::StdDevF32(vec.begin(), vec.totalSize(), mean);
-
- debug("Mean: %f, Stddev: %f\n", mean, stddev);
- if (stddev == 0) {
- std::fill(vec.begin(), vec.end(), 0);
- } else {
- const float stddevInv = 1.f/stddev;
- const float normalisedMean = mean/stddev;
-
- auto NormalisingFunction = [=](float& value) {
- value = value * stddevInv - normalisedMean;
- };
- std::for_each(vec.begin(), vec.end(), NormalisingFunction);
- }
- }
-
- void AsrPreProcess::Standarize()
- {
- AsrPreProcess::StandardizeVecF32(this->m_mfccBuf);
- AsrPreProcess::StandardizeVecF32(this->m_delta1Buf);
- AsrPreProcess::StandardizeVecF32(this->m_delta2Buf);
- }
-
- float AsrPreProcess::GetQuantElem(
- const float elem,
- const float quantScale,
- const int quantOffset,
- const float minVal,
- const float maxVal)
- {
- float val = std::round((elem/quantScale) + quantOffset);
- return std::min<float>(std::max<float>(val, minVal), maxVal);
- }
-
-} /* namespace app */
-} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/usecase.cmake b/source/use_case/kws_asr/usecase.cmake
index 40df4d7..59ef450 100644
--- a/source/use_case/kws_asr/usecase.cmake
+++ b/source/use_case/kws_asr/usecase.cmake
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#----------------------------------------------------------------------------
+# Append the APIs to use for this use case
+list(APPEND ${use_case}_API_LIST "kws" "asr")
USER_OPTION(${use_case}_FILE_PATH "Directory with WAV files, or path to a single WAV file, to use in the evaluation application."
${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/samples/
@@ -145,4 +147,4 @@ generate_audio_code(${${use_case}_FILE_PATH} ${SRC_GEN_DIR} ${INC_GEN_DIR}
${${use_case}_AUDIO_OFFSET}
${${use_case}_AUDIO_DURATION}
${${use_case}_AUDIO_RES_TYPE}
- ${${use_case}_AUDIO_MIN_SAMPLES}) \ No newline at end of file
+ ${${use_case}_AUDIO_MIN_SAMPLES})