summaryrefslogtreecommitdiff
path: root/source/use_case
diff options
context:
space:
mode:
Diffstat (limited to 'source/use_case')
-rw-r--r--source/use_case/ad/include/AdMelSpectrogram.hpp97
-rw-r--r--source/use_case/ad/include/AdModel.hpp53
-rw-r--r--source/use_case/ad/include/AdPostProcessing.hpp50
-rw-r--r--source/use_case/ad/include/MelSpectrogram.hpp233
-rw-r--r--source/use_case/ad/include/UseCaseHandler.hpp33
-rw-r--r--source/use_case/ad/src/AdMelSpectrogram.cc90
-rw-r--r--source/use_case/ad/src/AdModel.cc55
-rw-r--r--source/use_case/ad/src/AdPostProcessing.cc116
-rw-r--r--source/use_case/ad/src/MainLoop.cc114
-rw-r--r--source/use_case/ad/src/MelSpectrogram.cc311
-rw-r--r--source/use_case/ad/src/UseCaseHandler.cc422
-rw-r--r--source/use_case/ad/usecase.cmake111
-rw-r--r--source/use_case/asr/include/AsrClassifier.hpp62
-rw-r--r--source/use_case/asr/include/AsrResult.hpp63
-rw-r--r--source/use_case/asr/include/OutputDecode.hpp40
-rw-r--r--source/use_case/asr/include/UseCaseHandler.hpp37
-rw-r--r--source/use_case/asr/include/Wav2LetterMfcc.hpp109
-rw-r--r--source/use_case/asr/include/Wav2LetterModel.hpp61
-rw-r--r--source/use_case/asr/include/Wav2LetterPostprocess.hpp109
-rw-r--r--source/use_case/asr/include/Wav2LetterPreprocess.hpp203
-rw-r--r--source/use_case/asr/src/AsrClassifier.cc130
-rw-r--r--source/use_case/asr/src/MainLoop.cc230
-rw-r--r--source/use_case/asr/src/OutputDecode.cc47
-rw-r--r--source/use_case/asr/src/UseCaseHandler.cc288
-rw-r--r--source/use_case/asr/src/Wav2LetterMfcc.cc137
-rw-r--r--source/use_case/asr/src/Wav2LetterModel.cc56
-rw-r--r--source/use_case/asr/src/Wav2LetterPostprocess.cc172
-rw-r--r--source/use_case/asr/src/Wav2LetterPreprocess.cc228
-rw-r--r--source/use_case/asr/usecase.cmake164
-rw-r--r--source/use_case/img_class/include/MobileNetModel.hpp55
-rw-r--r--source/use_case/img_class/include/UseCaseHandler.hpp37
-rw-r--r--source/use_case/img_class/src/MainLoop.cc109
-rw-r--r--source/use_case/img_class/src/MobileNetModel.cc57
-rw-r--r--source/use_case/img_class/src/UseCaseHandler.cc269
-rw-r--r--source/use_case/img_class/usecase.cmake125
-rw-r--r--source/use_case/inference_runner/include/TestModel.hpp47
-rw-r--r--source/use_case/inference_runner/include/UseCaseHandler.hpp35
-rw-r--r--source/use_case/inference_runner/src/MainLoop.cc51
-rw-r--r--source/use_case/inference_runner/src/TestModel.cc36
-rw-r--r--source/use_case/inference_runner/src/UseCaseHandler.cc88
-rw-r--r--source/use_case/inference_runner/usecase.cmake57
-rw-r--r--source/use_case/kws/include/DsCnnMfcc.hpp50
-rw-r--r--source/use_case/kws/include/DsCnnModel.hpp59
-rw-r--r--source/use_case/kws/include/KwsResult.hpp63
-rw-r--r--source/use_case/kws/include/UseCaseHandler.hpp37
-rw-r--r--source/use_case/kws/src/DsCnnModel.cc58
-rw-r--r--source/use_case/kws/src/MainLoop.cc112
-rw-r--r--source/use_case/kws/src/UseCaseHandler.cc452
-rw-r--r--source/use_case/kws/usecase.cmake159
-rw-r--r--source/use_case/kws_asr/include/AsrClassifier.hpp64
-rw-r--r--source/use_case/kws_asr/include/AsrResult.hpp63
-rw-r--r--source/use_case/kws_asr/include/DsCnnMfcc.hpp51
-rw-r--r--source/use_case/kws_asr/include/DsCnnModel.hpp67
-rw-r--r--source/use_case/kws_asr/include/KwsResult.hpp63
-rw-r--r--source/use_case/kws_asr/include/OutputDecode.hpp40
-rw-r--r--source/use_case/kws_asr/include/UseCaseHandler.hpp37
-rw-r--r--source/use_case/kws_asr/include/Wav2LetterMfcc.hpp112
-rw-r--r--source/use_case/kws_asr/include/Wav2LetterModel.hpp67
-rw-r--r--source/use_case/kws_asr/include/Wav2LetterPostprocess.hpp101
-rw-r--r--source/use_case/kws_asr/include/Wav2LetterPreprocess.hpp205
-rw-r--r--source/use_case/kws_asr/src/AsrClassifier.cc131
-rw-r--r--source/use_case/kws_asr/src/DsCnnModel.cc67
-rw-r--r--source/use_case/kws_asr/src/MainLoop.cc233
-rw-r--r--source/use_case/kws_asr/src/OutputDecode.cc47
-rw-r--r--source/use_case/kws_asr/src/UseCaseHandler.cc707
-rw-r--r--source/use_case/kws_asr/src/Wav2LetterMfcc.cc137
-rw-r--r--source/use_case/kws_asr/src/Wav2LetterModel.cc62
-rw-r--r--source/use_case/kws_asr/src/Wav2LetterPostprocess.cc155
-rw-r--r--source/use_case/kws_asr/src/Wav2LetterPreprocess.cc228
-rw-r--r--source/use_case/kws_asr/usecase.cmake259
70 files changed, 8673 insertions, 0 deletions
diff --git a/source/use_case/ad/include/AdMelSpectrogram.hpp b/source/use_case/ad/include/AdMelSpectrogram.hpp
new file mode 100644
index 0000000..cf8a1d4
--- /dev/null
+++ b/source/use_case/ad/include/AdMelSpectrogram.hpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ADMELSPECTROGRAM_HPP
+#define ADMELSPECTROGRAM_HPP
+
+#include "MelSpectrogram.hpp"
+
+namespace arm {
+namespace app {
+namespace audio {
+
+ /* Class to provide anomaly detection specific Mel Spectrogram calculation requirements */
+ class AdMelSpectrogram : public MelSpectrogram {
+
+ public:
+ static constexpr uint32_t ms_defaultSamplingFreq = 16000;
+ static constexpr uint32_t ms_defaultNumFbankBins = 64;
+ static constexpr uint32_t ms_defaultMelLoFreq = 0;
+ static constexpr uint32_t ms_defaultMelHiFreq = 8000;
+ static constexpr bool ms_defaultUseHtkMethod = false;
+
+ explicit AdMelSpectrogram(const size_t frameLen)
+ : MelSpectrogram(MelSpecParams(
+ ms_defaultSamplingFreq, ms_defaultNumFbankBins,
+ ms_defaultMelLoFreq, ms_defaultMelHiFreq,
+ frameLen, ms_defaultUseHtkMethod))
+ {}
+
+ AdMelSpectrogram() = delete;
+ ~AdMelSpectrogram() = default;
+
+ protected:
+
+ /**
+ * @brief Overrides base class implementation of this function.
+ * @param[in] fftVec Vector populated with FFT magnitudes
+ * @param[in] melFilterBank 2D Vector with filter bank weights
+ * @param[in] filterBankFilterFirst Vector containing the first indices of filter bank
+ * to be used for each bin.
+ * @param[in] filterBankFilterLast Vector containing the last indices of filter bank
+ * to be used for each bin.
+ * @param[out] melEnergies Pre-allocated vector of MEL energies to be
+ * populated.
+ * @return true if successful, false otherwise
+ */
+ virtual bool ApplyMelFilterBank(
+ std::vector<float>& fftVec,
+ std::vector<std::vector<float>>& melFilterBank,
+ std::vector<int32_t>& filterBankFilterFirst,
+ std::vector<int32_t>& filterBankFilterLast,
+ std::vector<float>& melEnergies) override;
+
+ /**
+ * @brief Override for the base class implementation convert mel
+ * energies to logarithmic scale. The difference from
+ * default behaviour is that the power is converted to dB
+ * and subsequently clamped.
+ * @param[in/out] melEnergies - 1D vector of Mel energies
+ **/
+ virtual void ConvertToLogarithmicScale(std::vector<float>& melEnergies) override;
+
+ /**
+ * @brief Given the low and high Mel values, get the normaliser
+ * for weights to be applied when populating the filter
+ * bank. Override for the base class implementation.
+ * @param[in] leftMel - low Mel frequency value
+ * @param[in] rightMel - high Mel frequency value
+ * @param[in] useHTKMethod - bool to signal if HTK method is to be
+ * used for calculation
+ * @return Return float value to be applied
+ * when populating the filter bank.
+ */
+ virtual float GetMelFilterBankNormaliser(
+ const float& leftMel,
+ const float& rightMel,
+ const bool useHTKMethod) override;
+ };
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* ADMELSPECTROGRAM_HPP */
diff --git a/source/use_case/ad/include/AdModel.hpp b/source/use_case/ad/include/AdModel.hpp
new file mode 100644
index 0000000..2d83455
--- /dev/null
+++ b/source/use_case/ad/include/AdModel.hpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AD_MODEL_HPP
+#define AD_MODEL_HPP
+
+#include "Model.hpp"
+
+extern const int g_FrameLength;
+extern const int g_FrameStride;
+extern const float g_ScoreThreshold;
+extern const float g_TrainingMean;
+
+namespace arm {
+namespace app {
+
+ class AdModel : public Model {
+ protected:
+ /** @brief Gets the reference to op resolver interface class */
+ const tflite::MicroOpResolver& GetOpResolver() override;
+
+ /** @brief Adds operations to the op resolver instance */
+ bool EnlistOperations() override;
+
+ const uint8_t* ModelPointer() override;
+
+ size_t ModelSize() override;
+
+ private:
+ /* Maximum number of individual operations that can be enlisted */
+ static constexpr int _ms_maxOpCnt = 6;
+
+ /* A mutable op resolver instance */
+ tflite::MicroMutableOpResolver<_ms_maxOpCnt> _m_opResolver;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* AD_MODEL_HPP */
diff --git a/source/use_case/ad/include/AdPostProcessing.hpp b/source/use_case/ad/include/AdPostProcessing.hpp
new file mode 100644
index 0000000..f3b35a1
--- /dev/null
+++ b/source/use_case/ad/include/AdPostProcessing.hpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ADPOSTPROCESSING_HPP
+#define ADPOSTPROCESSING_HPP
+
+#include "TensorFlowLiteMicro.hpp"
+
+#include <vector>
+
+namespace arm {
+namespace app {
+
+ /** @brief Dequantize TensorFlow Lite Micro tensor.
+ * @param[in] tensor Pointer to the TensorFlow Lite Micro tensor to be dequantized.
+ * @return Vector with the dequantized tensor values.
+ **/
+ template<typename T>
+ std::vector<float> Dequantize(TfLiteTensor* tensor);
+
+ /**
+ * @brief Calculates the softmax of vector in place. **/
+ void Softmax(std::vector<float>& inputVector);
+
+
+ /** @brief Given a wav file name return AD model output index.
+ * @param[in] wavFileName Audio WAV filename.
+ * File name should be in format <anything>_<goes>_XX_<here>.wav
+ * where XX is the machine ID e.g. 00, 02, 04 or 06
+ * @return AD model output index as 8 bit integer.
+ **/
+ int8_t OutputIndexFromFileName(std::string wavFileName);
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* ADPOSTPROCESSING_HPP */
diff --git a/source/use_case/ad/include/MelSpectrogram.hpp b/source/use_case/ad/include/MelSpectrogram.hpp
new file mode 100644
index 0000000..c1dd61e
--- /dev/null
+++ b/source/use_case/ad/include/MelSpectrogram.hpp
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MELSPECTROGRAM_HPP
+#define MELSPECTROGRAM_HPP
+
+#include "PlatformMath.hpp"
+
+#include <vector>
+#include <cstdint>
+#include <cmath>
+#include <limits>
+#include <string>
+
+namespace arm {
+namespace app {
+namespace audio {
+
+ /* Mel Spectrogram consolidated parameters */
+ class MelSpecParams {
+ public:
+ float m_samplingFreq;
+ uint32_t m_numFbankBins;
+ float m_melLoFreq;
+ float m_melHiFreq;
+ uint32_t m_frameLen;
+ uint32_t m_frameLenPadded;
+ bool m_useHtkMethod;
+
+ /** @brief Constructor */
+ MelSpecParams(const float samplingFreq, const uint32_t numFbankBins,
+ const float melLoFreq, const float melHiFreq,
+ const uint32_t frameLen, const bool useHtkMethod);
+
+ MelSpecParams() = delete;
+ ~MelSpecParams() = default;
+
+ /** @brief String representation of parameters */
+ std::string Str();
+ };
+
+ /**
+ * @brief Class for Mel Spectrogram feature extraction.
+ * Based on https://github.com/ARM-software/ML-KWS-for-MCU/blob/master/Deployment/Source/MFCC/mfcc.cpp
+ * This class is designed to be generic and self-sufficient but
+ * certain calculation routines can be overridden to accommodate
+ * use-case specific requirements.
+ */
+ class MelSpectrogram {
+
+ public:
+ /**
+ * @brief Extract Mel Spectrogram for one single small frame of
+ * audio data e.g. 640 samples.
+ * @param[in] audioData - Vector of audio samples to calculate
+ * features for.
+ * @param[in] trainingMean - Value to subtract from the the computed mel spectrogram, default 0.
+ * @return Vector of extracted Mel Spectrogram features.
+ **/
+ std::vector<float> ComputeMelSpec(const std::vector<int16_t>& audioData, float trainingMean = 0);
+
+ /**
+ * @brief Constructor
+ * @param[in] params - Mel Spectrogram parameters
+ */
+ MelSpectrogram(const MelSpecParams& params);
+
+ MelSpectrogram() = delete;
+ ~MelSpectrogram() = default;
+
+ /** @brief Initialise */
+ void Init();
+
+ /**
+ * @brief Extract Mel Spectrogram features and quantise for one single small
+ * frame of audio data e.g. 640 samples.
+ * @param[in] audioData - Vector of audio samples to calculate
+ * features for.
+ * @param[in] quantScale - quantisation scale.
+ * @param[in] quantOffset - quantisation offset
+ * @return Vector of extracted quantised Mel Spectrogram features.
+ **/
+ template<typename T>
+ std::vector<T> MelSpecComputeQuant(const std::vector<int16_t>& audioData,
+ const float quantScale,
+ const int quantOffset,
+ float trainingMean = 0)
+ {
+ this->ComputeMelSpec(audioData, trainingMean);
+ float minVal = std::numeric_limits<T>::min();
+ float maxVal = std::numeric_limits<T>::max();
+
+ std::vector<T> melSpecOut(this->_m_params.m_numFbankBins);
+ const size_t numFbankBins = this->_m_params.m_numFbankBins;
+
+ /* Quantize to T. */
+ for (size_t k = 0; k < numFbankBins; ++k) {
+ auto quantizedEnergy = std::round(((this->_m_melEnergies[k]) / quantScale) + quantOffset);
+ melSpecOut[k] = static_cast<T>(std::min<float>(std::max<float>(quantizedEnergy, minVal), maxVal));
+ }
+
+ return melSpecOut;
+ }
+
+ /* Constants */
+ static constexpr float ms_logStep = /*logf(6.4)*/ 1.8562979903656 / 27.0;
+ static constexpr float ms_freqStep = 200.0 / 3;
+ static constexpr float ms_minLogHz = 1000.0;
+ static constexpr float ms_minLogMel = ms_minLogHz / ms_freqStep;
+
+ protected:
+ /**
+ * @brief Project input frequency to Mel Scale.
+ * @param[in] freq - input frequency in floating point
+ * @param[in] useHTKmethod - bool to signal if HTK method is to be
+ * used for calculation
+ * @return Mel transformed frequency in floating point
+ **/
+ static float MelScale(const float freq,
+ const bool useHTKMethod = true);
+
+ /**
+ * @brief Inverse Mel transform - convert MEL warped frequency
+ * back to normal frequency
+ * @param[in] freq - Mel frequency in floating point
+ * @param[in] useHTKmethod - bool to signal if HTK method is to be
+ * used for calculation
+ * @return Real world frequency in floating point
+ **/
+ static float InverseMelScale(const float melFreq,
+ const bool useHTKMethod = true);
+
+ /**
+ * @brief Populates MEL energies after applying the MEL filter
+ * bank weights and adding them up to be placed into
+ * bins, according to the filter bank's first and last
+ * indices (pre-computed for each filter bank element
+ * by _CreateMelFilterBank function).
+ * @param[in] fftVec Vector populated with FFT magnitudes
+ * @param[in] melFilterBank 2D Vector with filter bank weights
+ * @param[in] filterBankFilterFirst Vector containing the first indices of filter bank
+ * to be used for each bin.
+ * @param[in] filterBankFilterLast Vector containing the last indices of filter bank
+ * to be used for each bin.
+ * @param[out] melEnergies Pre-allocated vector of MEL energies to be
+ * populated.
+ * @return true if successful, false otherwise
+ */
+ virtual bool ApplyMelFilterBank(
+ std::vector<float>& fftVec,
+ std::vector<std::vector<float>>& melFilterBank,
+ std::vector<int32_t>& filterBankFilterFirst,
+ std::vector<int32_t>& filterBankFilterLast,
+ std::vector<float>& melEnergies);
+
+ /**
+ * @brief Converts the Mel energies for logarithmic scale
+ * @param[in/out] melEnergies - 1D vector of Mel energies
+ **/
+ virtual void ConvertToLogarithmicScale(std::vector<float>& melEnergies);
+
+ /**
+ * @brief Given the low and high Mel values, get the normaliser
+ * for weights to be applied when populating the filter
+ * bank.
+ * @param[in] leftMel - low Mel frequency value
+ * @param[in] rightMel - high Mel frequency value
+ * @param[in] useHTKMethod - bool to signal if HTK method is to be
+ * used for calculation
+ * @return Return float value to be applied
+ * when populating the filter bank.
+ */
+ virtual float GetMelFilterBankNormaliser(
+ const float& leftMel,
+ const float& rightMel,
+ const bool useHTKMethod);
+
+ private:
+ MelSpecParams _m_params;
+ std::vector<float> _m_frame;
+ std::vector<float> _m_buffer;
+ std::vector<float> _m_melEnergies;
+ std::vector<float> _m_windowFunc;
+ std::vector<std::vector<float>> _m_melFilterBank;
+ std::vector<int32_t> _m_filterBankFilterFirst;
+ std::vector<int32_t> _m_filterBankFilterLast;
+ bool _m_filterBankInitialised;
+ arm::app::math::FftInstance _m_fftInstance;
+
+ /**
+ * @brief Initialises the filter banks.
+ **/
+ void _InitMelFilterBank();
+
+ /**
+ * @brief Signals whether the instance of MelSpectrogram has had its
+ * required buffers initialised
+ * @return True if initialised, false otherwise
+ **/
+ bool _IsMelFilterBankInited();
+
+ /**
+ * @brief Create mel filter banks for Mel Spectrogram calculation.
+ * @return 2D vector of floats
+ **/
+ std::vector<std::vector<float>> _CreateMelFilterBank();
+
+ /**
+ * @brief Computes the magnitude from an interleaved complex array
+ **/
+ void _ConvertToPowerSpectrum();
+
+ };
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
+
+
+#endif /* MELSPECTROGRAM_HPP */
diff --git a/source/use_case/ad/include/UseCaseHandler.hpp b/source/use_case/ad/include/UseCaseHandler.hpp
new file mode 100644
index 0000000..b62b36d
--- /dev/null
+++ b/source/use_case/ad/include/UseCaseHandler.hpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AD_EVT_HANDLER_H
+#define AD_EVT_HANDLER_H
+#include "AppContext.hpp"
+
+namespace arm {
+namespace app {
+ /**
+ * @brief Handles the inference event
+ * @param[in] ctx pointer to the application context
+ * @param[in] dataIndex index to the input data to classify
+ * @param[in] runAll flag to request classification of all the available audio clips
+ * @return True or false based on execution success
+ **/
+ bool ClassifyVibrationHandler(ApplicationContext& ctx, uint32_t dataIndex, bool runAll);
+} /* namespace app */
+} /* namespace arm */
+#endif /* AD_EVT_HANDLER_H */ \ No newline at end of file
diff --git a/source/use_case/ad/src/AdMelSpectrogram.cc b/source/use_case/ad/src/AdMelSpectrogram.cc
new file mode 100644
index 0000000..183c05c
--- /dev/null
+++ b/source/use_case/ad/src/AdMelSpectrogram.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "AdMelSpectrogram.hpp"
+
+#include "PlatformMath.hpp"
+
+namespace arm {
+namespace app {
+namespace audio {
+
+ bool AdMelSpectrogram::ApplyMelFilterBank(
+ std::vector<float>& fftVec,
+ std::vector<std::vector<float>>& melFilterBank,
+ std::vector<int32_t>& filterBankFilterFirst,
+ std::vector<int32_t>& filterBankFilterLast,
+ std::vector<float>& melEnergies)
+ {
+ const size_t numBanks = melEnergies.size();
+
+ if (numBanks != filterBankFilterFirst.size() ||
+ numBanks != filterBankFilterLast.size()) {
+ printf_err("unexpected filter bank lengths\n");
+ return false;
+ }
+
+ for (size_t bin = 0; bin < numBanks; ++bin) {
+ auto filterBankIter = melFilterBank[bin].begin();
+ float melEnergy = 1e-10; /* Avoid log of zero at later stages. */
+ const int32_t firstIndex = filterBankFilterFirst[bin];
+ const int32_t lastIndex = filterBankFilterLast[bin];
+
+ for (int32_t i = firstIndex; i <= lastIndex; ++i) {
+ melEnergy += (*filterBankIter++ * fftVec[i]);
+ }
+
+ melEnergies[bin] = melEnergy;
+ }
+
+ return true;
+ }
+
+ void AdMelSpectrogram::ConvertToLogarithmicScale(
+ std::vector<float>& melEnergies)
+ {
+ /* Container for natural logarithms of mel energies */
+ std::vector <float> vecLogEnergies(melEnergies.size(), 0.f);
+
+ /* Because we are taking natural logs, we need to multiply by log10(e).
+ * Also, for wav2letter model, we scale our log10 values by 10 */
+ constexpr float multiplier = 10.0 * /* default scalar */
+ 0.4342944819032518; /* log10f(std::exp(1.0))*/
+
+ /* Take log of the whole vector */
+ math::MathUtils::VecLogarithmF32(melEnergies, vecLogEnergies);
+
+ /* Scale the log values. */
+ for (auto iterM = melEnergies.begin(), iterL = vecLogEnergies.begin();
+ iterM != melEnergies.end(); ++iterM, ++iterL) {
+
+ *iterM = *iterL * multiplier;
+ }
+ }
+
+ float AdMelSpectrogram::GetMelFilterBankNormaliser(
+ const float& leftMel,
+ const float& rightMel,
+ const bool useHTKMethod)
+ {
+ /* Slaney normalization for mel weights. */
+ return (2.0f / (AdMelSpectrogram::InverseMelScale(rightMel, useHTKMethod) -
+ AdMelSpectrogram::InverseMelScale(leftMel, useHTKMethod)));
+ }
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/use_case/ad/src/AdModel.cc b/source/use_case/ad/src/AdModel.cc
new file mode 100644
index 0000000..148bc98
--- /dev/null
+++ b/source/use_case/ad/src/AdModel.cc
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "AdModel.hpp"
+
+#include "hal.h"
+
+const tflite::MicroOpResolver& arm::app::AdModel::GetOpResolver()
+{
+ return this->_m_opResolver;
+}
+
+bool arm::app::AdModel::EnlistOperations()
+{
+ this->_m_opResolver.AddAveragePool2D();
+ this->_m_opResolver.AddConv2D();
+ this->_m_opResolver.AddDepthwiseConv2D();
+ this->_m_opResolver.AddRelu6();
+ this->_m_opResolver.AddReshape();
+
+#if defined(ARM_NPU)
+ if (kTfLiteOk == this->_m_opResolver.AddEthosU()) {
+ info("Added %s support to op resolver\n",
+ tflite::GetString_ETHOSU());
+ } else {
+ printf_err("Failed to add Arm NPU support to op resolver.");
+ return false;
+ }
+#endif /* ARM_NPU */
+ return true;
+}
+
+extern uint8_t* GetModelPointer();
+const uint8_t* arm::app::AdModel::ModelPointer()
+{
+ return GetModelPointer();
+}
+extern size_t GetModelLen();
+size_t arm::app::AdModel::ModelSize()
+{
+ return GetModelLen();
+}
diff --git a/source/use_case/ad/src/AdPostProcessing.cc b/source/use_case/ad/src/AdPostProcessing.cc
new file mode 100644
index 0000000..157784b
--- /dev/null
+++ b/source/use_case/ad/src/AdPostProcessing.cc
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "AdPostProcessing.hpp"
+
+#include "hal.h"
+
+#include <numeric>
+#include <cmath>
+#include <string>
+
+namespace arm {
+namespace app {
+
+ template<typename T>
+ std::vector<float> Dequantize(TfLiteTensor* tensor) {
+
+ if (tensor == nullptr) {
+ printf_err("Tensor is null pointer can not dequantize.\n");
+ return std::vector<float>();
+ }
+ T* tensorData = tflite::GetTensorData<T>(tensor);
+
+ uint32_t totalOutputSize = 1;
+ for (int inputDim = 0; inputDim < tensor->dims->size; inputDim++){
+ totalOutputSize *= tensor->dims->data[inputDim];
+ }
+
+ /* For getting the floating point values, we need quantization parameters */
+ QuantParams quantParams = GetTensorQuantParams(tensor);
+
+ std::vector<float> dequantizedOutput(totalOutputSize);
+
+ for (size_t i = 0; i < totalOutputSize; ++i) {
+ dequantizedOutput[i] = quantParams.scale * (tensorData[i] - quantParams.offset);
+ }
+
+ return dequantizedOutput;
+ }
+
+ void Softmax(std::vector<float>& inputVector) {
+ auto start = inputVector.begin();
+ auto end = inputVector.end();
+
+ /* Fix for numerical stability and apply exp. */
+ float maxValue = *std::max_element(start, end);
+ for (auto it = start; it!=end; ++it) {
+ *it = std::exp((*it) - maxValue);
+ }
+
+ float sumExp = std::accumulate(start, end, 0.0f);
+
+ for (auto it = start; it!=end; ++it) {
+ *it = (*it)/sumExp;
+ }
+ }
+
+ int8_t OutputIndexFromFileName(std::string wavFileName) {
+ /* Filename is assumed in the form machine_id_00.wav */
+ std::string delimiter = "_"; /* First character used to split the file name up. */
+ size_t delimiterStart;
+ std::string subString;
+ size_t machineIdxInString = 3; /* Which part of the file name the machine id should be at. */
+
+ for (size_t i = 0; i < machineIdxInString; ++i) {
+ delimiterStart = wavFileName.find(delimiter);
+ subString = wavFileName.substr(0, delimiterStart);
+ wavFileName.erase(0, delimiterStart + delimiter.length());
+ }
+
+ /* At this point substring should be 00.wav */
+ delimiter = "."; /* Second character used to split the file name up. */
+ delimiterStart = subString.find(delimiter);
+ subString = (delimiterStart != std::string::npos) ? subString.substr(0, delimiterStart) : subString;
+
+ auto is_number = [](const std::string& str) -> bool
+ {
+ std::string::const_iterator it = str.begin();
+ while (it != str.end() && std::isdigit(*it)) ++it;
+ return !str.empty() && it == str.end();
+ };
+
+ const int8_t machineIdx = is_number(subString) ? std::stoi(subString) : -1;
+
+ /* Return corresponding index in the output vector. */
+ if (machineIdx == 0) {
+ return 0;
+ } else if (machineIdx == 2) {
+ return 1;
+ } else if (machineIdx == 4) {
+ return 2;
+ } else if (machineIdx == 6) {
+ return 3;
+ } else {
+ printf_err("%d is an invalid machine index \n", machineIdx);
+ return -1;
+ }
+ }
+
+ template std::vector<float> Dequantize<uint8_t>(TfLiteTensor* tensor);
+ template std::vector<float> Dequantize<int8_t>(TfLiteTensor* tensor);
+} /* namespace app */
+} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/ad/src/MainLoop.cc b/source/use_case/ad/src/MainLoop.cc
new file mode 100644
index 0000000..5455b43
--- /dev/null
+++ b/source/use_case/ad/src/MainLoop.cc
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "hal.h" /* Brings in platform definitions */
+#include "InputFiles.hpp" /* For input data */
+#include "AdModel.hpp" /* Model class for running inference */
+#include "UseCaseCommonUtils.hpp" /* Utils functions */
+#include "UseCaseHandler.hpp" /* Handlers for different user options */
+
+enum opcodes
+{
+ MENU_OPT_RUN_INF_NEXT = 1, /* Run on next vector */
+ MENU_OPT_RUN_INF_CHOSEN, /* Run on a user provided vector index */
+ MENU_OPT_RUN_INF_ALL, /* Run inference on all */
+ MENU_OPT_SHOW_MODEL_INFO, /* Show model info */
+ MENU_OPT_LIST_AUDIO_CLIPS /* List the current baked audio signals */
+};
+
+static void DisplayMenu()
+{
+ printf("\n\nUser input required\n");
+ printf("Enter option number from:\n\n");
+ printf(" %u. Classify next audio signal\n", MENU_OPT_RUN_INF_NEXT);
+ printf(" %u. Classify audio signal at chosen index\n", MENU_OPT_RUN_INF_CHOSEN);
+ printf(" %u. Run classification on all audio signals\n", MENU_OPT_RUN_INF_ALL);
+ printf(" %u. Show NN model info\n", MENU_OPT_SHOW_MODEL_INFO);
+ printf(" %u. List audio signals\n\n", MENU_OPT_LIST_AUDIO_CLIPS);
+ printf(" Choice: ");
+}
+
+
+void main_loop(hal_platform& platform)
+{
+ arm::app::AdModel model; /* Model wrapper object. */
+
+ /* Load the model. */
+ if (!model.Init())
+ {
+ printf_err("failed to initialise model\n");
+ return;
+ }
+
+ /* Instantiate application context. */
+ arm::app::ApplicationContext caseContext;
+
+ caseContext.Set<hal_platform&>("platform", platform);
+ caseContext.Set<arm::app::Model&>("model", model);
+ caseContext.Set<uint32_t>("clipIndex", 0);
+ caseContext.Set<int>("frameLength", g_FrameLength);
+ caseContext.Set<int>("frameStride", g_FrameStride);
+ caseContext.Set<float>("scoreThreshold", g_ScoreThreshold);
+ caseContext.Set<float>("trainingMean", g_TrainingMean);
+
+ /* Main program loop. */
+ bool executionSuccessful = true;
+ constexpr bool bUseMenu = NUMBER_OF_FILES > 1 ? true : false;
+
+ /* Loop. */
+ do {
+ int menuOption = MENU_OPT_RUN_INF_NEXT;
+ if (bUseMenu) {
+ DisplayMenu();
+ menuOption = arm::app::ReadUserInputAsInt(platform);
+ printf("\n");
+ }
+ switch (menuOption) {
+ case MENU_OPT_RUN_INF_NEXT:
+ executionSuccessful = ClassifyVibrationHandler(
+ caseContext,
+ caseContext.Get<uint32_t>("clipIndex"),
+ false);
+ break;
+ case MENU_OPT_RUN_INF_CHOSEN: {
+ printf(" Enter the data index [0, %d]: ",
+ NUMBER_OF_FILES-1);
+ auto audioIndex = static_cast<uint32_t>(
+ arm::app::ReadUserInputAsInt(platform));
+ executionSuccessful = ClassifyVibrationHandler(caseContext,
+ audioIndex,
+ false);
+ break;
+ }
+ case MENU_OPT_RUN_INF_ALL:
+ executionSuccessful = ClassifyVibrationHandler(
+ caseContext,
+ caseContext.Get<uint32_t>("clipIndex"),
+ true);
+ break;
+ case MENU_OPT_SHOW_MODEL_INFO:
+ executionSuccessful = model.ShowModelInfoHandler();
+ break;
+ case MENU_OPT_LIST_AUDIO_CLIPS:
+ executionSuccessful = ListFilesHandler(caseContext);
+ break;
+ default:
+ printf("Incorrect choice, try again.");
+ break;
+ }
+ } while (executionSuccessful && bUseMenu);
+ info("Main loop terminated.\n");
+}
diff --git a/source/use_case/ad/src/MelSpectrogram.cc b/source/use_case/ad/src/MelSpectrogram.cc
new file mode 100644
index 0000000..86d57e6
--- /dev/null
+++ b/source/use_case/ad/src/MelSpectrogram.cc
@@ -0,0 +1,311 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "MelSpectrogram.hpp"
+
+#include "PlatformMath.hpp"
+
+#include <cfloat>
+
+namespace arm {
+namespace app {
+namespace audio {
+
+ MelSpecParams::MelSpecParams(
+ const float samplingFreq,
+ const uint32_t numFbankBins,
+ const float melLoFreq,
+ const float melHiFreq,
+ const uint32_t frameLen,
+ const bool useHtkMethod):
+ m_samplingFreq(samplingFreq),
+ m_numFbankBins(numFbankBins),
+ m_melLoFreq(melLoFreq),
+ m_melHiFreq(melHiFreq),
+ m_frameLen(frameLen),
+
+ /* Smallest power of 2 >= frame length. */
+ m_frameLenPadded(pow(2, ceil((log(frameLen)/log(2))))),
+ m_useHtkMethod(useHtkMethod)
+ {}
+
+ std::string MelSpecParams::Str()
+ {
+ char strC[1024];
+ snprintf(strC, sizeof(strC) - 1, "\n \
+ \n\t Sampling frequency: %f\
+ \n\t Number of filter banks: %u\
+ \n\t Mel frequency limit (low): %f\
+ \n\t Mel frequency limit (high): %f\
+ \n\t Frame length: %u\
+ \n\t Padded frame length: %u\
+ \n\t Using HTK for Mel scale: %s\n",
+ this->m_samplingFreq, this->m_numFbankBins, this->m_melLoFreq,
+ this->m_melHiFreq, this->m_frameLen,
+ this->m_frameLenPadded, this->m_useHtkMethod ? "yes" : "no");
+ return std::string{strC};
+ }
+
+ MelSpectrogram::MelSpectrogram(const MelSpecParams& params):
+ _m_params(params),
+ _m_filterBankInitialised(false)
+ {
+ this->_m_buffer = std::vector<float>(
+ this->_m_params.m_frameLenPadded, 0.0);
+ this->_m_frame = std::vector<float>(
+ this->_m_params.m_frameLenPadded, 0.0);
+ this->_m_melEnergies = std::vector<float>(
+ this->_m_params.m_numFbankBins, 0.0);
+
+ this->_m_windowFunc = std::vector<float>(this->_m_params.m_frameLen);
+ const float multiplier = 2 * M_PI / this->_m_params.m_frameLen;
+
+ /* Create window function. */
+ for (size_t i = 0; i < this->_m_params.m_frameLen; ++i) {
+ this->_m_windowFunc[i] = (0.5 - (0.5 *
+ math::MathUtils::CosineF32(static_cast<float>(i) * multiplier)));
+ }
+
+ math::MathUtils::FftInitF32(this->_m_params.m_frameLenPadded, this->_m_fftInstance);
+ debug("Instantiated Mel Spectrogram object: %s\n", this->_m_params.Str().c_str());
+ }
+
+ void MelSpectrogram::Init()
+ {
+ this->_InitMelFilterBank();
+ }
+
+ float MelSpectrogram::MelScale(const float freq, const bool useHTKMethod)
+ {
+ if (useHTKMethod) {
+ return 1127.0f * logf (1.0f + freq / 700.0f);
+ } else {
+ /* Slaney formula for mel scale. */
+ float mel = freq / ms_freqStep;
+
+ if (freq >= ms_minLogHz) {
+ mel = ms_minLogMel + logf(freq / ms_minLogHz) / ms_logStep;
+ }
+ return mel;
+ }
+ }
+
+ float MelSpectrogram::InverseMelScale(const float melFreq, const bool useHTKMethod)
+ {
+ if (useHTKMethod) {
+ return 700.0f * (expf (melFreq / 1127.0f) - 1.0f);
+ } else {
+ /* Slaney formula for inverse mel scale. */
+ float freq = ms_freqStep * melFreq;
+
+ if (melFreq >= ms_minLogMel) {
+ freq = ms_minLogHz * expf(ms_logStep * (melFreq - ms_minLogMel));
+ }
+ return freq;
+ }
+ }
+
+ bool MelSpectrogram::ApplyMelFilterBank(
+ std::vector<float>& fftVec,
+ std::vector<std::vector<float>>& melFilterBank,
+ std::vector<int32_t>& filterBankFilterFirst,
+ std::vector<int32_t>& filterBankFilterLast,
+ std::vector<float>& melEnergies)
+ {
+ const size_t numBanks = melEnergies.size();
+
+ if (numBanks != filterBankFilterFirst.size() ||
+ numBanks != filterBankFilterLast.size()) {
+ printf_err("unexpected filter bank lengths\n");
+ return false;
+ }
+
+ for (size_t bin = 0; bin < numBanks; ++bin) {
+ auto filterBankIter = melFilterBank[bin].begin();
+ float melEnergy = FLT_MIN; /* Avoid log of zero at later stages */
+ int32_t firstIndex = filterBankFilterFirst[bin];
+ int32_t lastIndex = filterBankFilterLast[bin];
+
+ for (int i = firstIndex; i <= lastIndex; ++i) {
+ float energyRep = math::MathUtils::SqrtF32(fftVec[i]);
+ melEnergy += (*filterBankIter++ * energyRep);
+ }
+
+ melEnergies[bin] = melEnergy;
+ }
+
+ return true;
+ }
+
+ void MelSpectrogram::ConvertToLogarithmicScale(std::vector<float>& melEnergies)
+ {
+ for (size_t bin = 0; bin < melEnergies.size(); ++bin) {
+ melEnergies[bin] = logf(melEnergies[bin]);
+ }
+ }
+
+ void MelSpectrogram::_ConvertToPowerSpectrum()
+ {
+ const uint32_t halfDim = this->_m_params.m_frameLenPadded / 2;
+
+ /* Handle this special case. */
+ float firstEnergy = this->_m_buffer[0] * this->_m_buffer[0];
+ float lastEnergy = this->_m_buffer[1] * this->_m_buffer[1];
+
+ math::MathUtils::ComplexMagnitudeSquaredF32(
+ this->_m_buffer.data(),
+ this->_m_buffer.size(),
+ this->_m_buffer.data(),
+ this->_m_buffer.size()/2);
+
+ this->_m_buffer[0] = firstEnergy;
+ this->_m_buffer[halfDim] = lastEnergy;
+ }
+
+ float MelSpectrogram::GetMelFilterBankNormaliser(
+ const float& leftMel,
+ const float& rightMel,
+ const bool useHTKMethod)
+ {
+ UNUSED(leftMel);
+ UNUSED(rightMel);
+ UNUSED(useHTKMethod);
+
+ /* By default, no normalisation => return 1 */
+ return 1.f;
+ }
+
+ void MelSpectrogram::_InitMelFilterBank()
+ {
+ if (!this->_IsMelFilterBankInited()) {
+ this->_m_melFilterBank = this->_CreateMelFilterBank();
+ this->_m_filterBankInitialised = true;
+ }
+ }
+
+ bool MelSpectrogram::_IsMelFilterBankInited()
+ {
+ return this->_m_filterBankInitialised;
+ }
+
+ std::vector<float> MelSpectrogram::ComputeMelSpec(const std::vector<int16_t>& audioData, float trainingMean)
+ {
+ this->_InitMelFilterBank();
+
+ /* TensorFlow way of normalizing .wav data to (-1, 1). */
+ constexpr float normaliser = 1.0/(1<<15);
+ for (size_t i = 0; i < this->_m_params.m_frameLen; ++i) {
+ this->_m_frame[i] = static_cast<float>(audioData[i]) * normaliser;
+ }
+
+ /* Apply window function to input frame. */
+ for(size_t i = 0; i < this->_m_params.m_frameLen; ++i) {
+ this->_m_frame[i] *= this->_m_windowFunc[i];
+ }
+
+ /* Set remaining frame values to 0. */
+ std::fill(this->_m_frame.begin() + this->_m_params.m_frameLen,this->_m_frame.end(), 0);
+
+ /* Compute FFT. */
+ math::MathUtils::FftF32(this->_m_frame, this->_m_buffer, this->_m_fftInstance);
+
+ /* Convert to power spectrum. */
+ this->_ConvertToPowerSpectrum();
+
+ /* Apply mel filterbanks. */
+ if (!this->ApplyMelFilterBank(this->_m_buffer,
+ this->_m_melFilterBank,
+ this->_m_filterBankFilterFirst,
+ this->_m_filterBankFilterLast,
+ this->_m_melEnergies)) {
+ printf_err("Failed to apply MEL filter banks\n");
+ }
+
+ /* Convert to logarithmic scale */
+ this->ConvertToLogarithmicScale(this->_m_melEnergies);
+
+ /* Perform mean subtraction. */
+ for (auto& energy:this->_m_melEnergies) {
+ energy -= trainingMean;
+ }
+
+ return this->_m_melEnergies;
+ }
+
+ std::vector<std::vector<float>> MelSpectrogram::_CreateMelFilterBank()
+ {
+ size_t numFftBins = this->_m_params.m_frameLenPadded / 2;
+ float fftBinWidth = static_cast<float>(this->_m_params.m_samplingFreq) / this->_m_params.m_frameLenPadded;
+
+ float melLowFreq = MelSpectrogram::MelScale(this->_m_params.m_melLoFreq,
+ this->_m_params.m_useHtkMethod);
+ float melHighFreq = MelSpectrogram::MelScale(this->_m_params.m_melHiFreq,
+ this->_m_params.m_useHtkMethod);
+ float melFreqDelta = (melHighFreq - melLowFreq) / (this->_m_params.m_numFbankBins + 1);
+
+ std::vector<float> thisBin = std::vector<float>(numFftBins);
+ std::vector<std::vector<float>> melFilterBank(
+ this->_m_params.m_numFbankBins);
+ this->_m_filterBankFilterFirst =
+ std::vector<int32_t>(this->_m_params.m_numFbankBins);
+ this->_m_filterBankFilterLast =
+ std::vector<int32_t>(this->_m_params.m_numFbankBins);
+
+ for (size_t bin = 0; bin < this->_m_params.m_numFbankBins; bin++) {
+ float leftMel = melLowFreq + bin * melFreqDelta;
+ float centerMel = melLowFreq + (bin + 1) * melFreqDelta;
+ float rightMel = melLowFreq + (bin + 2) * melFreqDelta;
+
+ int32_t firstIndex = -1;
+ int32_t lastIndex = -1;
+ const float normaliser = this->GetMelFilterBankNormaliser(leftMel, rightMel, this->_m_params.m_useHtkMethod);
+
+ for (size_t i = 0; i < numFftBins; ++i) {
+ float freq = (fftBinWidth * i); /* Center freq of this fft bin. */
+ float mel = MelSpectrogram::MelScale(freq, this->_m_params.m_useHtkMethod);
+ thisBin[i] = 0.0;
+
+ if (mel > leftMel && mel < rightMel) {
+ float weight;
+ if (mel <= centerMel) {
+ weight = (mel - leftMel) / (centerMel - leftMel);
+ } else {
+ weight = (rightMel - mel) / (rightMel - centerMel);
+ }
+
+ thisBin[i] = weight * normaliser;
+ if (firstIndex == -1) {
+ firstIndex = i;
+ }
+ lastIndex = i;
+ }
+ }
+
+ this->_m_filterBankFilterFirst[bin] = firstIndex;
+ this->_m_filterBankFilterLast[bin] = lastIndex;
+
+ /* Copy the part we care about. */
+ for (int32_t i = firstIndex; i <= lastIndex; ++i) {
+ melFilterBank[bin].push_back(thisBin[i]);
+ }
+ }
+
+ return melFilterBank;
+ }
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/use_case/ad/src/UseCaseHandler.cc b/source/use_case/ad/src/UseCaseHandler.cc
new file mode 100644
index 0000000..c18a0a4
--- /dev/null
+++ b/source/use_case/ad/src/UseCaseHandler.cc
@@ -0,0 +1,422 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "UseCaseHandler.hpp"
+
+#include "AdModel.hpp"
+#include "InputFiles.hpp"
+#include "Classifier.hpp"
+#include "hal.h"
+#include "AdMelSpectrogram.hpp"
+#include "AudioUtils.hpp"
+#include "UseCaseCommonUtils.hpp"
+#include "AdPostProcessing.hpp"
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief Helper function to increment current audio clip index
+ * @param[in/out] ctx pointer to the application context object
+ **/
+ static void _IncrementAppCtxClipIdx(ApplicationContext& ctx);
+
+ /**
+ * @brief Helper function to set the audio clip index
+ * @param[in/out] ctx pointer to the application context object
+ * @param[in] idx value to be set
+ * @return true if index is set, false otherwise
+ **/
+ static bool _SetAppCtxClipIdx(ApplicationContext& ctx, uint32_t idx);
+
+ /**
+ * @brief Presents inference results using the data presentation
+ * object.
+ * @param[in] platform reference to the hal platform object
+ * @param[in] result average sum of classification results
+ * @param[in] threhsold if larger than this value we have an anomaly
+ * @return true if successful, false otherwise
+ **/
+ static bool _PresentInferenceResult(hal_platform& platform, float result, float threshold);
+
+ /**
+ * @brief Returns a function to perform feature calculation and populates input tensor data with
+ * MelSpe data.
+ *
+ * Input tensor data type check is performed to choose correct MFCC feature data type.
+ * If tensor has an integer data type then original features are quantised.
+ *
+ * Warning: mfcc calculator provided as input must have the same life scope as returned function.
+ *
+ * @param[in] mfcc MFCC feature calculator.
+ * @param[in/out] inputTensor Input tensor pointer to store calculated features.
+ * @param[i] cacheSize Size of the feture vectors cache (number of feature vectors).
+ * @return function function to be called providing audio sample and sliding window index.
+ */
+ static std::function<void (std::vector<int16_t>&, int, bool, size_t, size_t)>
+ GetFeatureCalculator(audio::AdMelSpectrogram& melSpec,
+ TfLiteTensor* inputTensor,
+ size_t cacheSize,
+ float trainingMean);
+
+ /* Vibration classification handler */
+ bool ClassifyVibrationHandler(ApplicationContext& ctx, uint32_t clipIndex, bool runAll)
+ {
+ auto& platform = ctx.Get<hal_platform&>("platform");
+
+ constexpr uint32_t dataPsnTxtInfStartX = 20;
+ constexpr uint32_t dataPsnTxtInfStartY = 40;
+
+ platform.data_psn->clear(COLOR_BLACK);
+
+ auto& model = ctx.Get<Model&>("model");
+
+ /* If the request has a valid size, set the audio index */
+ if (clipIndex < NUMBER_OF_FILES) {
+ if (!_SetAppCtxClipIdx(ctx, clipIndex)) {
+ return false;
+ }
+ }
+ if (!model.IsInited()) {
+ printf_err("Model is not initialised! Terminating processing.\n");
+ return false;
+ }
+
+ const auto frameLength = ctx.Get<int>("frameLength");
+ const auto frameStride = ctx.Get<int>("frameStride");
+ const auto scoreThreshold = ctx.Get<float>("scoreThreshold");
+ const float trainingMean = ctx.Get<float>("trainingMean");
+ auto startClipIdx = ctx.Get<uint32_t>("clipIndex");
+
+ TfLiteTensor* outputTensor = model.GetOutputTensor(0);
+ TfLiteTensor* inputTensor = model.GetInputTensor(0);
+
+ if (!inputTensor->dims) {
+ printf_err("Invalid input tensor dims\n");
+ return false;
+ }
+
+ TfLiteIntArray* inputShape = model.GetInputShape(0);
+ const uint32_t kNumRows = inputShape->data[1];
+ const uint32_t kNumCols = inputShape->data[2];
+
+ audio::AdMelSpectrogram melSpec = audio::AdMelSpectrogram(frameLength);
+ melSpec.Init();
+
+ /* Deduce the data length required for 1 inference from the network parameters. */
+ const uint8_t inputResizeScale = 2;
+ const uint32_t audioDataWindowSize = (((inputResizeScale * kNumCols) - 1) * frameStride) + frameLength;
+
+ /* We are choosing to move by 20 frames across the audio for each inference. */
+ const uint8_t nMelSpecVectorsInAudioStride = 20;
+
+ auto audioDataStride = nMelSpecVectorsInAudioStride * frameStride;
+
+ do {
+ auto currentIndex = ctx.Get<uint32_t>("clipIndex");
+
+ /* Get the output index to look at based on id in the filename. */
+ int8_t machineOutputIndex = OutputIndexFromFileName(get_filename(currentIndex));
+ if (machineOutputIndex == -1) {
+ return false;
+ }
+
+ /* Creating a Mel Spectrogram sliding window for the data required for 1 inference.
+ * "resizing" done here by multiplying stride by resize scale. */
+ auto audioMelSpecWindowSlider = audio::SlidingWindow<const int16_t>(
+ get_audio_array(currentIndex),
+ audioDataWindowSize, frameLength,
+ frameStride * inputResizeScale);
+
+ /* Creating a sliding window through the whole audio clip. */
+ auto audioDataSlider = audio::SlidingWindow<const int16_t>(
+ get_audio_array(currentIndex),
+ get_audio_array_size(currentIndex),
+ audioDataWindowSize, audioDataStride);
+
+ /* Calculate number of the feature vectors in the window overlap region taking into account resizing.
+ * These feature vectors will be reused.*/
+ auto numberOfReusedFeatureVectors = kNumRows - (nMelSpecVectorsInAudioStride / inputResizeScale);
+
+ /* Construct feature calculation function. */
+ auto melSpecFeatureCalc = GetFeatureCalculator(melSpec, inputTensor,
+ numberOfReusedFeatureVectors, trainingMean);
+ if (!melSpecFeatureCalc){
+ return false;
+ }
+
+ /* Result is an averaged sum over inferences. */
+ float result = 0;
+
+ /* Display message on the LCD - inference running. */
+ std::string str_inf{"Running inference... "};
+ platform.data_psn->present_data_text(
+ str_inf.c_str(), str_inf.size(),
+ dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+ info("Running inference on audio clip %u => %s\n", currentIndex, get_filename(currentIndex));
+
+ /* Start sliding through audio clip. */
+ while (audioDataSlider.HasNext()) {
+ const int16_t *inferenceWindow = audioDataSlider.Next();
+
+ /* We moved to the next window - set the features sliding to the new address. */
+ audioMelSpecWindowSlider.Reset(inferenceWindow);
+
+ /* The first window does not have cache ready. */
+ bool useCache = audioDataSlider.Index() > 0 && numberOfReusedFeatureVectors > 0;
+
+ /* Start calculating features inside one audio sliding window. */
+ while (audioMelSpecWindowSlider.HasNext()) {
+ const int16_t *melSpecWindow = audioMelSpecWindowSlider.Next();
+ std::vector<int16_t> melSpecAudioData = std::vector<int16_t>(melSpecWindow,
+ melSpecWindow + frameLength);
+
+ /* Compute features for this window and write them to input tensor. */
+ melSpecFeatureCalc(melSpecAudioData, audioMelSpecWindowSlider.Index(),
+ useCache, nMelSpecVectorsInAudioStride, inputResizeScale);
+ }
+
+ info("Inference %zu/%zu\n", audioDataSlider.Index() + 1,
+ audioDataSlider.TotalStrides() + 1);
+
+ /* Run inference over this audio clip sliding window */
+ arm::app::RunInference(platform, model);
+
+ /* Use the negative softmax score of the corresponding index as the outlier score */
+ std::vector<float> dequantOutput = Dequantize<int8_t>(outputTensor);
+ Softmax(dequantOutput);
+ result += -dequantOutput[machineOutputIndex];
+
+#if VERIFY_TEST_OUTPUT
+ arm::app::DumpTensor(outputTensor);
+#endif /* VERIFY_TEST_OUTPUT */
+ } /* while (audioDataSlider.HasNext()) */
+
+ /* Use average over whole clip as final score. */
+ result /= (audioDataSlider.TotalStrides() + 1);
+
+ /* Erase. */
+ str_inf = std::string(str_inf.size(), ' ');
+ platform.data_psn->present_data_text(
+ str_inf.c_str(), str_inf.size(),
+ dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+
+ ctx.Set<float>("result", result);
+ if (!_PresentInferenceResult(platform, result, scoreThreshold)) {
+ return false;
+ }
+
+ _IncrementAppCtxClipIdx(ctx);
+
+ } while (runAll && ctx.Get<uint32_t>("clipIndex") != startClipIdx);
+
+ return true;
+ }
+
+ static void _IncrementAppCtxClipIdx(ApplicationContext& ctx)
+ {
+ auto curAudioIdx = ctx.Get<uint32_t>("clipIndex");
+
+ if (curAudioIdx + 1 >= NUMBER_OF_FILES) {
+ ctx.Set<uint32_t>("clipIndex", 0);
+ return;
+ }
+ ++curAudioIdx;
+ ctx.Set<uint32_t>("clipIndex", curAudioIdx);
+ }
+
+ static bool _SetAppCtxClipIdx(ApplicationContext& ctx, const uint32_t idx)
+ {
+ if (idx >= NUMBER_OF_FILES) {
+ printf_err("Invalid idx %u (expected less than %u)\n",
+ idx, NUMBER_OF_FILES);
+ return false;
+ }
+ ctx.Set<uint32_t>("clipIndex", idx);
+ return true;
+ }
+
+ static bool _PresentInferenceResult(hal_platform& platform, float result, float threshold)
+ {
+ constexpr uint32_t dataPsnTxtStartX1 = 20;
+ constexpr uint32_t dataPsnTxtStartY1 = 30;
+ constexpr uint32_t dataPsnTxtYIncr = 16; /* Row index increment */
+
+ platform.data_psn->set_text_color(COLOR_GREEN);
+
+ /* Display each result */
+ uint32_t rowIdx1 = dataPsnTxtStartY1 + 2 * dataPsnTxtYIncr;
+
+ std::string resultStr = std::string{"Average anomaly score is: "} + std::to_string(result) +
+ std::string("\n") + std::string("Anomaly threshold is: ") + std::to_string(threshold) +
+ std::string("\n");
+
+ if (result > threshold) {
+ resultStr += std::string("Anomaly detected!");
+ } else {
+ resultStr += std::string("Everything fine, no anomaly detected!");
+ }
+
+ platform.data_psn->present_data_text(
+ resultStr.c_str(), resultStr.size(),
+ dataPsnTxtStartX1, rowIdx1, 0);
+
+ info("%s\n", resultStr.c_str());
+
+ return true;
+ }
+
+ /**
+ * @brief Generic feature calculator factory.
+ *
+ * Returns lambda function to compute features using features cache.
+ * Real features math is done by a lambda function provided as a parameter.
+ * Features are written to input tensor memory.
+ *
+ * @tparam T feature vector type.
+ * @param inputTensor model input tensor pointer.
+ * @param cacheSize number of feature vectors to cache. Defined by the sliding window overlap.
+ * @param compute features calculator function.
+ * @return lambda function to compute features.
+ */
+ template<class T>
+ std::function<void (std::vector<int16_t>&, size_t, bool, size_t, size_t)>
+ _FeatureCalc(TfLiteTensor* inputTensor, size_t cacheSize,
+ std::function<std::vector<T> (std::vector<int16_t>& )> compute)
+ {
+ /* Feature cache to be captured by lambda function*/
+ static std::vector<std::vector<T>> featureCache = std::vector<std::vector<T>>(cacheSize);
+
+ return [=](std::vector<int16_t>& audioDataWindow,
+ size_t index,
+ bool useCache,
+ size_t featuresOverlapIndex,
+ size_t resizeScale)
+ {
+ T *tensorData = tflite::GetTensorData<T>(inputTensor);
+ std::vector<T> features;
+
+ /* Reuse features from cache if cache is ready and sliding windows overlap.
+ * Overlap is in the beginning of sliding window with a size of a feature cache. */
+ if (useCache && index < featureCache.size()) {
+ features = std::move(featureCache[index]);
+ } else {
+ features = std::move(compute(audioDataWindow));
+ }
+ auto size = features.size() / resizeScale;
+ auto sizeBytes = sizeof(T);
+
+ /* Input should be transposed and "resized" by skipping elements. */
+ for (size_t outIndex = 0; outIndex < size; outIndex++) {
+ std::memcpy(tensorData + (outIndex*size) + index, &features[outIndex*resizeScale], sizeBytes);
+ }
+
+ /* Start renewing cache as soon iteration goes out of the windows overlap. */
+ if (index >= featuresOverlapIndex / resizeScale) {
+ featureCache[index - featuresOverlapIndex / resizeScale] = std::move(features);
+ }
+ };
+ }
+
+ template std::function<void (std::vector<int16_t>&, size_t , bool, size_t, size_t)>
+ _FeatureCalc<int8_t>(TfLiteTensor* inputTensor,
+ size_t cacheSize,
+ std::function<std::vector<int8_t> (std::vector<int16_t>&)> compute);
+
+ template std::function<void (std::vector<int16_t>&, size_t , bool, size_t, size_t)>
+ _FeatureCalc<uint8_t>(TfLiteTensor* inputTensor,
+ size_t cacheSize,
+ std::function<std::vector<uint8_t> (std::vector<int16_t>&)> compute);
+
+ template std::function<void (std::vector<int16_t>&, size_t , bool, size_t, size_t)>
+ _FeatureCalc<int16_t>(TfLiteTensor* inputTensor,
+ size_t cacheSize,
+ std::function<std::vector<int16_t> (std::vector<int16_t>&)> compute);
+
+ template std::function<void(std::vector<int16_t>&, size_t, bool, size_t, size_t)>
+ _FeatureCalc<float>(TfLiteTensor *inputTensor,
+ size_t cacheSize,
+ std::function<std::vector<float>(std::vector<int16_t>&)> compute);
+
+
+ static std::function<void (std::vector<int16_t>&, int, bool, size_t, size_t)>
+ GetFeatureCalculator(audio::AdMelSpectrogram& melSpec, TfLiteTensor* inputTensor, size_t cacheSize, float trainingMean)
+ {
+ std::function<void (std::vector<int16_t>&, size_t, bool, size_t, size_t)> melSpecFeatureCalc;
+
+ TfLiteQuantization quant = inputTensor->quantization;
+
+ if (kTfLiteAffineQuantization == quant.type) {
+
+ auto *quantParams = (TfLiteAffineQuantization *) quant.params;
+ const float quantScale = quantParams->scale->data[0];
+ const int quantOffset = quantParams->zero_point->data[0];
+
+ switch (inputTensor->type) {
+ case kTfLiteInt8: {
+ melSpecFeatureCalc = _FeatureCalc<int8_t>(inputTensor,
+ cacheSize,
+ [=, &melSpec](std::vector<int16_t>& audioDataWindow) {
+ return melSpec.MelSpecComputeQuant<int8_t>(audioDataWindow,
+ quantScale,
+ quantOffset,
+ trainingMean);
+ }
+ );
+ break;
+ }
+ case kTfLiteUInt8: {
+ melSpecFeatureCalc = _FeatureCalc<uint8_t>(inputTensor,
+ cacheSize,
+ [=, &melSpec](std::vector<int16_t>& audioDataWindow) {
+ return melSpec.MelSpecComputeQuant<uint8_t>(audioDataWindow,
+ quantScale,
+ quantOffset,
+ trainingMean);
+ }
+ );
+ break;
+ }
+ case kTfLiteInt16: {
+ melSpecFeatureCalc = _FeatureCalc<int16_t>(inputTensor,
+ cacheSize,
+ [=, &melSpec](std::vector<int16_t>& audioDataWindow) {
+ return melSpec.MelSpecComputeQuant<int16_t>(audioDataWindow,
+ quantScale,
+ quantOffset,
+ trainingMean);
+ }
+ );
+ break;
+ }
+ default:
+ printf_err("Tensor type %s not supported\n", TfLiteTypeGetName(inputTensor->type));
+ }
+
+
+ } else {
+ melSpecFeatureCalc = melSpecFeatureCalc = _FeatureCalc<float>(inputTensor,
+ cacheSize,
+ [=, &melSpec](std::vector<int16_t>& audioDataWindow) {
+ return melSpec.ComputeMelSpec(audioDataWindow,
+ trainingMean);
+ });
+ }
+ return melSpecFeatureCalc;
+ }
+
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/use_case/ad/usecase.cmake b/source/use_case/ad/usecase.cmake
new file mode 100644
index 0000000..46e4101
--- /dev/null
+++ b/source/use_case/ad/usecase.cmake
@@ -0,0 +1,111 @@
+#----------------------------------------------------------------------------
+# Copyright (c) 2021 Arm Limited. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#----------------------------------------------------------------------------
+
+# If the path to a directory or source file has been defined,
+# get the type here (FILEPATH or PATH):
+if (DEFINED ${use_case}_FILE_PATH)
+ get_path_type(${${use_case}_FILE_PATH} PATH_TYPE)
+
+ # Set the default type if path is not a dir or file path (or undefined)
+ if (NOT ${PATH_TYPE} STREQUAL PATH AND NOT ${PATH_TYPE} STREQUAL FILEPATH)
+ message(FATAL_ERROR "Invalid ${use_case}_FILE_PATH. It should be a dir or file path.")
+ endif()
+else()
+ # Default is a directory path
+ set(PATH_TYPE PATH)
+endif()
+
+message(STATUS "${use_case}_FILE_PATH is of type: ${PATH_TYPE}")
+
+USER_OPTION(${use_case}_FILE_PATH "Directory with custom WAV input files, or path to a single input WAV file, to use in the evaluation application."
+ ${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/samples/
+ ${PATH_TYPE})
+
+USER_OPTION(${use_case}_AUDIO_RATE "Specify the target sampling rate. Default is 16000."
+ 16000
+ STRING)
+
+USER_OPTION(${use_case}_AUDIO_MONO "Specify if the audio needs to be converted to mono. Default is ON."
+ ON
+ BOOL)
+
+USER_OPTION(${use_case}_AUDIO_OFFSET "Specify the offset to start reading after this time (in seconds). Default is 0."
+ 0
+ STRING)
+
+USER_OPTION(${use_case}_AUDIO_DURATION "Specify the audio duration to load (in seconds). If set to 0 the entire audio will be processed."
+ 0
+ STRING)
+
+USER_OPTION(${use_case}_AUDIO_RES_TYPE "Specify re-sampling algorithm to use. By default is 'kaiser_best'."
+ kaiser_best
+ STRING)
+
+USER_OPTION(${use_case}_AUDIO_MIN_SAMPLES "Specify the minimum number of samples to use. By default is amount needed to do one inference,
+ if the audio is shorter then it will be automatically padded."
+ 33280
+ STRING)
+
+USER_OPTION(${use_case}_MODEL_SCORE_THRESHOLD "Specify the score threshold for a result to be deemed anomalous."
+ -0.8
+ STRING)
+
+generate_audio_code(${${use_case}_FILE_PATH} ${SRC_GEN_DIR} ${INC_GEN_DIR}
+ ${${use_case}_AUDIO_RATE}
+ ${${use_case}_AUDIO_MONO}
+ ${${use_case}_AUDIO_OFFSET}
+ ${${use_case}_AUDIO_DURATION}
+ ${${use_case}_AUDIO_RES_TYPE}
+ ${${use_case}_AUDIO_MIN_SAMPLES})
+
+USER_OPTION(${use_case}_ACTIVATION_BUF_SZ "Activation buffer size for the chosen model"
+ 0x00200000
+ STRING)
+
+# If there is no tflite file pointed to
+if (NOT DEFINED ${use_case}_MODEL_TFLITE_PATH)
+
+ set(MODEL_RESOURCES_DIR ${DOWNLOAD_DEP_DIR}/${use_case})
+ file(MAKE_DIRECTORY ${MODEL_RESOURCES_DIR})
+ set(MODEL_FILENAME ad_med_nov11_int8.tflite)
+ set(DEFAULT_MODEL_PATH ${MODEL_RESOURCES_DIR}/${MODEL_FILENAME})
+
+ # TODO: Download the model here for this use case when available on Model Zoo.
+ # For now we write a place holder file.
+ file(WRITE ${DEFAULT_MODEL_PATH} "Placeholder")
+else()
+ set(DEFAULT_MODEL_PATH "N/A")
+endif()
+
+set(EXTRA_MODEL_CODE
+ "/* Model parameters for ${use_case} */"
+ "extern const int g_FrameLength = 1024"
+ "extern const int g_FrameStride = 512"
+ "extern const float g_ScoreThreshold = ${${use_case}_MODEL_SCORE_THRESHOLD}"
+ "extern const float g_TrainingMean = -30"
+ )
+
+USER_OPTION(${use_case}_MODEL_TFLITE_PATH "NN models file to be used in the evaluation application. Model files must be in tflite format."
+ ${DEFAULT_MODEL_PATH}
+ FILEPATH)
+
+# Generate model file
+generate_tflite_code(
+ MODEL_PATH ${${use_case}_MODEL_TFLITE_PATH}
+ DESTINATION ${SRC_GEN_DIR}
+ EXPRESSIONS ${EXTRA_MODEL_CODE}
+)
diff --git a/source/use_case/asr/include/AsrClassifier.hpp b/source/use_case/asr/include/AsrClassifier.hpp
new file mode 100644
index 0000000..1a63814
--- /dev/null
+++ b/source/use_case/asr/include/AsrClassifier.hpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ASR_CLASSIFIER_HPP
+#define ASR_CLASSIFIER_HPP
+
+#include "Classifier.hpp"
+
+namespace arm {
+namespace app {
+
+ class AsrClassifier : public Classifier {
+ public:
+ /**
+ * @brief Gets the top N classification results from the
+ * output vector.
+ * @param[in] outputTensor Inference output tensor from an NN model.
+ * @param[out] vecResults A vector of classification results
+ * populated by this function.
+ * @param[in] labels Labels vector to match classified classes
+ * @param[in] topNCount Number of top classifications to pick.
+ * @return true if successful, false otherwise.
+ **/
+ bool GetClassificationResults(
+ TfLiteTensor* outputTensor,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector <std::string>& labels, uint32_t topNCount) override;
+
+ private:
+ /**
+ * @brief Utility function that gets the top 1 classification results from the
+ * output tensor (vector of vector).
+ * @param[in] tensor Inference output tensor from an NN model.
+ * @param[out] vecResults Vector of classification results populated by this function.
+ * @param[in] labels Labels vector to match classified classes.
+ * @param[in] scale Quantization scale.
+ * @param[in] zeroPoint Quantization zero point.
+ * @return true if successful, false otherwise.
+ **/
+ template<typename T>
+ bool _GetTopResults(TfLiteTensor* tensor,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector <std::string>& labels, double scale, double zeroPoint);
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* ASR_CLASSIFIER_HPP */ \ No newline at end of file
diff --git a/source/use_case/asr/include/AsrResult.hpp b/source/use_case/asr/include/AsrResult.hpp
new file mode 100644
index 0000000..b12ed7d
--- /dev/null
+++ b/source/use_case/asr/include/AsrResult.hpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ASR_RESULT_HPP
+#define ASR_RESULT_HPP
+
+#include "ClassificationResult.hpp"
+
+#include <vector>
+
+namespace arm {
+namespace app {
+namespace asr {
+
+ using ResultVec = std::vector < arm::app::ClassificationResult >;
+
+ /* Structure for holding ASR result. */
+ class AsrResult {
+
+ public:
+ ResultVec m_resultVec; /* Container for "thresholded" classification results. */
+ float m_timeStamp; /* Audio timestamp for this result. */
+ uint32_t m_inferenceNumber; /* Corresponding inference number. */
+ float m_threshold; /* Threshold value for `m_resultVec.` */
+
+ AsrResult() = delete;
+ AsrResult(ResultVec& resultVec,
+ const float timestamp,
+ const uint32_t inferenceIdx,
+ const float scoreThreshold) {
+
+ this->m_threshold = scoreThreshold;
+ this->m_timeStamp = timestamp;
+ this->m_inferenceNumber = inferenceIdx;
+
+ this->m_resultVec = ResultVec();
+ for (auto& i : resultVec) {
+ if (i.m_normalisedVal >= this->m_threshold) {
+ this->m_resultVec.emplace_back(i);
+ }
+ }
+ }
+ ~AsrResult() = default;
+ };
+
+} /* namespace asr */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* ASR_RESULT_HPP */ \ No newline at end of file
diff --git a/source/use_case/asr/include/OutputDecode.hpp b/source/use_case/asr/include/OutputDecode.hpp
new file mode 100644
index 0000000..6095531
--- /dev/null
+++ b/source/use_case/asr/include/OutputDecode.hpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ASR_OUTPUT_DECODE_HPP
+#define ASR_OUTPUT_DECODE_HPP
+
+#include "AsrClassifier.hpp"
+
+namespace arm {
+namespace app {
+namespace audio {
+namespace asr {
+
+ /**
+ * @brief Gets the top N classification results from the
+ * output vector.
+ * @param[in] tensor Label output from classifier.
+ * @return true if successful, false otherwise.
+ **/
+ std::string DecodeOutput(const std::vector<ClassificationResult>& vecResults);
+
+} /* namespace asr */
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* ASR_OUTPUT_DECODE_HPP */ \ No newline at end of file
diff --git a/source/use_case/asr/include/UseCaseHandler.hpp b/source/use_case/asr/include/UseCaseHandler.hpp
new file mode 100644
index 0000000..75052c7
--- /dev/null
+++ b/source/use_case/asr/include/UseCaseHandler.hpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ASR_EVT_HANDLER_HPP
+#define ASR_EVT_HANDLER_HPP
+
+#include "AppContext.hpp"
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief Handles the inference event.
+ * @param[in] ctx Pointer to the application context.
+ * @param[in] clipIndex Index to the audio clip to classify.
+ * @param[in] runAll Flag to request classification of all the available audio clips.
+ * @return true or false based on execution success.
+ **/
+ bool ClassifyAudioHandler(ApplicationContext& ctx, uint32_t clipIndex, bool runAll);
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* ASR_EVT_HANDLER_HPP */
diff --git a/source/use_case/asr/include/Wav2LetterMfcc.hpp b/source/use_case/asr/include/Wav2LetterMfcc.hpp
new file mode 100644
index 0000000..3cb43b9
--- /dev/null
+++ b/source/use_case/asr/include/Wav2LetterMfcc.hpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ASR_WAV2LETTER_MFCC_HPP
+#define ASR_WAV2LETTER_MFCC_HPP
+
+#include "Mfcc.hpp"
+
+namespace arm {
+namespace app {
+namespace audio {
+
+ /* Class to provide Wav2Letter specific MFCC calculation requirements. */
+ class Wav2LetterMFCC : public MFCC {
+
+ public:
+ static constexpr uint32_t ms_defaultSamplingFreq = 16000;
+ static constexpr uint32_t ms_defaultNumFbankBins = 128;
+ static constexpr uint32_t ms_defaultMelLoFreq = 0;
+ static constexpr uint32_t ms_defaultMelHiFreq = 8000;
+ static constexpr bool ms_defaultUseHtkMethod = false;
+
+ explicit Wav2LetterMFCC(const size_t numFeats, const size_t frameLen)
+ : MFCC(MfccParams(
+ ms_defaultSamplingFreq, ms_defaultNumFbankBins,
+ ms_defaultMelLoFreq, ms_defaultMelHiFreq,
+ numFeats, frameLen, ms_defaultUseHtkMethod))
+ {}
+
+ Wav2LetterMFCC() = delete;
+ ~Wav2LetterMFCC() = default;
+
+ protected:
+
+ /**
+ * @brief Overrides base class implementation of this function.
+ * @param[in] fftVec Vector populated with FFT magnitudes
+ * @param[in] melFilterBank 2D Vector with filter bank weights
+ * @param[in] filterBankFilterFirst Vector containing the first indices of filter bank
+ * to be used for each bin.
+ * @param[in] filterBankFilterLast Vector containing the last indices of filter bank
+ * to be used for each bin.
+ * @param[out] melEnergies Pre-allocated vector of MEL energies to be
+ * populated.
+ * @return true if successful, false otherwise
+ */
+ bool ApplyMelFilterBank(
+ std::vector<float>& fftVec,
+ std::vector<std::vector<float>>& melFilterBank,
+ std::vector<int32_t>& filterBankFilterFirst,
+ std::vector<int32_t>& filterBankFilterLast,
+ std::vector<float>& melEnergies) override;
+
+ /**
+ * @brief Override for the base class implementation convert mel
+ * energies to logarithmic scale. The difference from
+ * default behaviour is that the power is converted to dB
+ * and subsequently clamped.
+ * @param[in,out] melEnergies 1D vector of Mel energies
+ **/
+ void ConvertToLogarithmicScale(std::vector<float>& melEnergies) override;
+
+ /**
+ * @brief Create a matrix used to calculate Discrete Cosine
+ * Transform. Override for the base class' default
+ * implementation as the first and last elements
+ * use a different normaliser.
+ * @param[in] inputLength input length of the buffer on which
+ * DCT will be performed
+ * @param[in] coefficientCount Total coefficients per input length.
+ * @return 1D vector with inputLength x coefficientCount elements
+ * populated with DCT coefficients.
+ */
+ std::vector<float> CreateDCTMatrix(int32_t inputLength,
+ int32_t coefficientCount) override;
+
+ /**
+ * @brief Given the low and high Mel values, get the normaliser
+ * for weights to be applied when populating the filter
+ * bank. Override for the base class implementation.
+ * @param[in] leftMel Low Mel frequency value.
+ * @param[in] rightMel High Mel frequency value.
+ * @param[in] useHTKMethod bool to signal if HTK method is to be
+ * used for calculation.
+ * @return Value to use for normalising.
+ */
+ float GetMelFilterBankNormaliser(const float& leftMel,
+ const float& rightMel,
+ bool useHTKMethod) override;
+ };
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* ASR_WAV2LETTER_MFCC_HPP */ \ No newline at end of file
diff --git a/source/use_case/asr/include/Wav2LetterModel.hpp b/source/use_case/asr/include/Wav2LetterModel.hpp
new file mode 100644
index 0000000..b801e10
--- /dev/null
+++ b/source/use_case/asr/include/Wav2LetterModel.hpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.rved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ASR_WAV2LETTER_MODEL_HPP
+#define ASR_WAV2LETTER_MODEL_HPP
+
+#include "Model.hpp"
+
+extern const int g_FrameLength;
+extern const int g_FrameStride;
+extern const float g_ScoreThreshold;
+extern const int g_ctxLen;
+
+namespace arm {
+namespace app {
+
+ class Wav2LetterModel : public Model {
+
+ public:
+ /* Indices for the expected model - based on input and output tensor shapes */
+ static constexpr uint32_t ms_inputRowsIdx = 1;
+ static constexpr uint32_t ms_inputColsIdx = 2;
+ static constexpr uint32_t ms_outputRowsIdx = 2;
+ static constexpr uint32_t ms_outputColsIdx = 3;
+
+ protected:
+ /** @brief Gets the reference to op resolver interface class. */
+ const tflite::MicroOpResolver& GetOpResolver() override;
+
+ /** @brief Adds operations to the op resolver instance. */
+ bool EnlistOperations() override;
+
+ const uint8_t* ModelPointer() override;
+
+ size_t ModelSize() override;
+
+ private:
+ /* Maximum number of individual operations that can be enlisted. */
+ static constexpr int _ms_maxOpCnt = 5;
+
+ /* A mutable op resolver instance. */
+ tflite::MicroMutableOpResolver<_ms_maxOpCnt> _m_opResolver;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* ASR_WAV2LETTER_MODEL_HPP */
diff --git a/source/use_case/asr/include/Wav2LetterPostprocess.hpp b/source/use_case/asr/include/Wav2LetterPostprocess.hpp
new file mode 100644
index 0000000..69567a3
--- /dev/null
+++ b/source/use_case/asr/include/Wav2LetterPostprocess.hpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ASR_WAV2LETTER_POSTPROCESS_HPP
+#define ASR_WAV2LETTER_POSTPROCESS_HPP
+
+#include "TensorFlowLiteMicro.hpp" /* TensorFlow headers. */
+#include "hal.h" /* stdout facility. */
+
+namespace arm {
+namespace app {
+namespace audio {
+namespace asr {
+
+ /**
+ * @brief Helper class to manage tensor post-processing for "wav2letter"
+ * output.
+ */
+ class Postprocess {
+ public:
+ /**
+ * @brief Constructor
+ * @param[in] contextLen Left and right context length for
+ * output tensor.
+ * @param[in] innerLen This is the length of the section
+ * between left and right context.
+ **/
+ Postprocess(uint32_t contextLen,
+ uint32_t innerLen,
+ uint32_t blankTokenIdx);
+
+ Postprocess() = delete;
+ ~Postprocess() = default;
+
+ /**
+ * @brief Erases the required part of the tensor based
+ * on context lengths set up during initialisation.
+ * @param[in] tensor Pointer to the tensor.
+ * @param[in] axisIdx Index of the axis on which erase is
+ * performed.
+ * @param[in] lastIteration Flag to signal this is the
+ * last iteration in which case
+ * the right context is preserved.
+ * @return true if successful, false otherwise.
+ */
+ bool Invoke(TfLiteTensor* tensor,
+ uint32_t axisIdx,
+ bool lastIteration = false);
+
+ private:
+ uint32_t _m_contextLen; /* lengths of left and right contexts. */
+ uint32_t _m_innerLen; /* Length of inner context. */
+ uint32_t _m_totalLen; /* Total length of the required axis. */
+ uint32_t _m_countIterations; /* Current number of iterations. */
+ uint32_t _m_blankTokenIdx; /* Index of the labels blank token. */
+ /**
+ * @brief Checks if the tensor and axis index are valid
+ * inputs to the object - based on how it has been
+ * initialised.
+ * @return true if valid, false otherwise.
+ */
+ bool _IsInputValid(TfLiteTensor* tensor,
+ uint32_t axisIdx) const;
+
+ /**
+ * @brief Gets the tensor data element size in bytes based
+ * on the tensor type.
+ * @return Size in bytes, 0 if not supported.
+ */
+ uint32_t _GetTensorElementSize(TfLiteTensor* tensor);
+
+ /**
+ * @brief Erases sections from the data assuming row-wise
+ * arrangement along the context axis.
+ * @return true if successful, false otherwise.
+ */
+ bool _EraseSectionsRowWise(uint8_t* ptrData,
+ uint32_t strideSzBytes,
+ bool lastIteration);
+
+ /**
+ * @brief Erases sections from the data assuming col-wise
+ * arrangement along the context axis.
+ * @return true if successful, false otherwise.
+ */
+ bool _EraseSectionsColWise(uint8_t* ptrData,
+ uint32_t strideSzBytes,
+ bool lastIteration);
+ };
+
+} /* namespace asr */
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* ASR_WAV2LETTER_POSTPROCESS_HPP */ \ No newline at end of file
diff --git a/source/use_case/asr/include/Wav2LetterPreprocess.hpp b/source/use_case/asr/include/Wav2LetterPreprocess.hpp
new file mode 100644
index 0000000..8a4e0b7
--- /dev/null
+++ b/source/use_case/asr/include/Wav2LetterPreprocess.hpp
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ASR_WAV2LETTER_PREPROCESS_HPP
+#define ASR_WAV2LETTER_PREPROCESS_HPP
+
+#include "Wav2LetterModel.hpp"
+#include "Wav2LetterMfcc.hpp"
+#include "AudioUtils.hpp"
+#include "DataStructures.hpp"
+
+namespace arm {
+namespace app {
+namespace audio {
+namespace asr {
+
+ /* Class to facilitate pre-processing calculation for Wav2Letter model
+ * for ASR. */
+ using AudioWindow = SlidingWindow <const int16_t>;
+
+ class Preprocess {
+ public:
+ /**
+ * @brief Constructor.
+ * @param[in] numMfccFeatures Number of MFCC features per window.
+ * @param[in] windowLen Number of elements in a window.
+ * @param[in] windowStride Stride (in number of elements) for
+ * moving the window.
+ * @param[in] numMfccVectors Number of MFCC vectors per window.
+ */
+ Preprocess(
+ uint32_t numMfccFeatures,
+ uint32_t windowLen,
+ uint32_t windowStride,
+ uint32_t numMfccVectors);
+ Preprocess() = delete;
+ ~Preprocess() = default;
+
+ /**
+ * @brief Calculates the features required from audio data. This
+ * includes MFCC, first and second order deltas,
+ * normalisation and finally, quantisation. The tensor is
+ * populated with feature from a given window placed along
+ * in a single row.
+ * @param[in] audioData Pointer to the first element of audio data.
+ * @param[in] audioDataLen Number of elements in the audio data.
+ * @param[in] tensor Tensor to be populated.
+ * @return true if successful, false in case of error.
+ */
+ bool Invoke(const int16_t * audioData,
+ uint32_t audioDataLen,
+ TfLiteTensor * tensor);
+
+ protected:
+ /**
+ * @brief Computes the first and second order deltas for the
+ * MFCC buffers - they are assumed to be populated.
+ *
+ * @param[in] mfcc MFCC buffers.
+ * @param[out] delta1 Result of the first diff computation.
+ * @param[out] delta2 Result of the second diff computation.
+ * @return true if successful, false otherwise.
+ */
+ static bool _ComputeDeltas(Array2d<float>& mfcc,
+ Array2d<float>& delta1,
+ Array2d<float>& delta2);
+
+ /**
+ * @brief Given a 2D vector of floats, computes the mean.
+ * @param[in] vec Vctor of vector of floats.
+ * @return Mean value.
+ */
+ static float _GetMean(Array2d<float>& vec);
+
+ /**
+ * @brief Given a 2D vector of floats, computes the stddev.
+ * @param[in] vec Vector of vector of floats.
+ * @param[in] mean Mean value of the vector passed in.
+ * @return stddev value.
+ */
+ static float _GetStdDev(Array2d<float>& vec,
+ float mean);
+
+ /**
+ * @brief Given a 2D vector of floats, normalises it using
+ * the mean and the stddev.
+ * @param[in,out] vec Vector of vector of floats.
+ */
+ static void _NormaliseVec(Array2d<float>& vec);
+
+ /**
+ * @brief Normalises the MFCC and delta buffers.
+ */
+ void _Normalise();
+
+ /**
+ * @brief Given the quantisation and data type limits, computes
+ * the quantised values of a floating point input data.
+ * @param[in] elem Element to be quantised.
+ * @param[in] quantScale Scale.
+ * @param[in] quantOffset Offset.
+ * @param[in] minVal Numerical limit - minimum.
+ * @param[in] maxVal Numerical limit - maximum.
+ * @return Floating point quantised value.
+ */
+ static float _GetQuantElem(
+ float elem,
+ float quantScale,
+ int quantOffset,
+ float minVal,
+ float maxVal);
+
+ /**
+ * @brief Quantises the MFCC and delta buffers, and places them
+ * in the output buffer. While doing so, it transposes
+ * the data. Reason: Buffers in this class are arranged
+ * for "time" axis to be row major. Primary reason for
+ * this being the convolution speed up (as we can use
+ * contiguous memory). The output, however, requires the
+ * time axis to be in column major arrangement.
+ * @param[in] outputBuf Pointer to the output buffer.
+ * @param[in] outputBufSz Output buffer's size.
+ * @param[in] quantScale Quantisation scale.
+ * @param[in] quantOffset Quantisation offset.
+ */
+ template <typename T>
+ bool _Quantise(
+ T * outputBuf,
+ const uint32_t outputBufSz,
+ const float quantScale,
+ const int quantOffset)
+ {
+ /* Check the output size will fit everything. */
+ if (outputBufSz < (this->_m_mfccBuf.size(0) * 3 * sizeof(T))) {
+ printf_err("Tensor size too small for features\n");
+ return false;
+ }
+
+ /* Populate. */
+ T * outputBufMfcc = outputBuf;
+ T * outputBufD1 = outputBuf + this->_m_numMfccFeats;
+ T * outputBufD2 = outputBufD1 + this->_m_numMfccFeats;
+ const uint32_t ptrIncr = this->_m_numMfccFeats * 2; /* (3 vectors - 1 vector) */
+
+ const float minVal = std::numeric_limits<T>::min();
+ const float maxVal = std::numeric_limits<T>::max();
+
+ /* Need to transpose while copying and concatenating the tensor. */
+ for (uint32_t j = 0; j < this->_m_numFeatVectors; ++j) {
+ for (uint32_t i = 0; i < this->_m_numMfccFeats; ++i) {
+ *outputBufMfcc++ = static_cast<T>(Preprocess::_GetQuantElem(
+ this->_m_mfccBuf(i, j), quantScale,
+ quantOffset, minVal, maxVal));
+ *outputBufD1++ = static_cast<T>(Preprocess::_GetQuantElem(
+ this->_m_delta1Buf(i, j), quantScale,
+ quantOffset, minVal, maxVal));
+ *outputBufD2++ = static_cast<T>(Preprocess::_GetQuantElem(
+ this->_m_delta2Buf(i, j), quantScale,
+ quantOffset, minVal, maxVal));
+ }
+ outputBufMfcc += ptrIncr;
+ outputBufD1 += ptrIncr;
+ outputBufD2 += ptrIncr;
+ }
+
+ return true;
+ }
+
+ private:
+ Wav2LetterMFCC _m_mfcc; /* MFCC instance. */
+
+ /* Actual buffers to be populated. */
+ Array2d<float> _m_mfccBuf; /* Contiguous buffer 1D: MFCC */
+ Array2d<float> _m_delta1Buf; /* Contiguous buffer 1D: Delta 1 */
+ Array2d<float> _m_delta2Buf; /* Contiguous buffer 1D: Delta 2 */
+
+ uint32_t _m_windowLen; /* Window length for MFCC. */
+ uint32_t _m_windowStride; /* Window stride len for MFCC. */
+ uint32_t _m_numMfccFeats; /* Number of MFCC features per window. */
+ uint32_t _m_numFeatVectors; /* Number of _m_numMfccFeats. */
+ AudioWindow _m_window; /* Sliding window. */
+
+ };
+
+} /* namespace asr */
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* ASR_WAV2LETTER_PREPROCESS_HPP */ \ No newline at end of file
diff --git a/source/use_case/asr/src/AsrClassifier.cc b/source/use_case/asr/src/AsrClassifier.cc
new file mode 100644
index 0000000..7377d30
--- /dev/null
+++ b/source/use_case/asr/src/AsrClassifier.cc
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "AsrClassifier.hpp"
+
+#include "hal.h"
+#include "TensorFlowLiteMicro.hpp"
+#include "Wav2LetterModel.hpp"
+
+template<typename T>
+bool arm::app::AsrClassifier::_GetTopResults(TfLiteTensor* tensor,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector <std::string>& labels, double scale, double zeroPoint)
+{
+ const uint32_t nElems = tensor->dims->data[arm::app::Wav2LetterModel::ms_outputRowsIdx];
+ const uint32_t nLetters = tensor->dims->data[arm::app::Wav2LetterModel::ms_outputColsIdx];
+
+ /* NOTE: tensor's size verification against labels should be
+ * checked by the calling/public function. */
+ if (nLetters < 1) {
+ return false;
+ }
+
+ /* Final results' container. */
+ vecResults = std::vector<ClassificationResult>(nElems);
+
+ T* tensorData = tflite::GetTensorData<T>(tensor);
+
+ /* Get the top 1 results. */
+ for (uint32_t i = 0, row = 0; i < nElems; ++i, row+=nLetters) {
+ std::pair<T, uint32_t> top_1 = std::make_pair(tensorData[row + 0], 0);
+
+ for (uint32_t j = 1; j < nLetters; ++j) {
+ if (top_1.first < tensorData[row + j]) {
+ top_1.first = tensorData[row + j];
+ top_1.second = j;
+ }
+ }
+
+ double score = static_cast<int> (top_1.first);
+ vecResults[i].m_normalisedVal = scale * (score - zeroPoint);
+ vecResults[i].m_label = labels[top_1.second];
+ vecResults[i].m_labelIdx = top_1.second;
+ }
+
+ return true;
+}
+template bool arm::app::AsrClassifier::_GetTopResults<uint8_t>(TfLiteTensor* tensor,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector <std::string>& labels, double scale, double zeroPoint);
+template bool arm::app::AsrClassifier::_GetTopResults<int8_t>(TfLiteTensor* tensor,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector <std::string>& labels, double scale, double zeroPoint);
+
+bool arm::app::AsrClassifier::GetClassificationResults(
+ TfLiteTensor* outputTensor,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector <std::string>& labels, uint32_t topNCount)
+{
+ vecResults.clear();
+
+ constexpr int minTensorDims = static_cast<int>(
+ (arm::app::Wav2LetterModel::ms_outputRowsIdx > arm::app::Wav2LetterModel::ms_outputColsIdx)?
+ arm::app::Wav2LetterModel::ms_outputRowsIdx : arm::app::Wav2LetterModel::ms_outputColsIdx);
+
+ constexpr uint32_t outColsIdx = arm::app::Wav2LetterModel::ms_outputColsIdx;
+
+ /* Sanity checks. */
+ if (outputTensor == nullptr) {
+ printf_err("Output vector is null pointer.\n");
+ return false;
+ } else if (outputTensor->dims->size < minTensorDims) {
+ printf_err("Output tensor expected to be %dD\n", minTensorDims);
+ return false;
+ } else if (static_cast<uint32_t>(outputTensor->dims->data[outColsIdx]) < topNCount) {
+ printf_err("Output vectors are smaller than %u\n", topNCount);
+ return false;
+ } else if (static_cast<uint32_t>(outputTensor->dims->data[outColsIdx]) != labels.size()) {
+ printf("Output size doesn't match the labels' size\n");
+ return false;
+ }
+
+ if (topNCount != 1) {
+ warn("TopNCount value ignored in this implementation\n");
+ }
+
+ /* To return the floating point values, we need quantization parameters. */
+ QuantParams quantParams = GetTensorQuantParams(outputTensor);
+
+ bool resultState;
+
+ switch (outputTensor->type) {
+ case kTfLiteUInt8:
+ resultState = this->_GetTopResults<uint8_t>(
+ outputTensor, vecResults,
+ labels, quantParams.scale,
+ quantParams.offset);
+ break;
+ case kTfLiteInt8:
+ resultState = this->_GetTopResults<int8_t>(
+ outputTensor, vecResults,
+ labels, quantParams.scale,
+ quantParams.offset);
+ break;
+ default:
+ printf_err("Tensor type %s not supported by classifier\n",
+ TfLiteTypeGetName(outputTensor->type));
+ return false;
+ }
+
+ if (!resultState) {
+ printf_err("Failed to get sorted set\n");
+ return false;
+ }
+
+ return true;
+} \ No newline at end of file
diff --git a/source/use_case/asr/src/MainLoop.cc b/source/use_case/asr/src/MainLoop.cc
new file mode 100644
index 0000000..ca777be
--- /dev/null
+++ b/source/use_case/asr/src/MainLoop.cc
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "hal.h" /* Brings in platform definitions. */
+#include "Labels.hpp" /* For label strings. */
+#include "UseCaseHandler.hpp" /* Handlers for different user options. */
+#include "Wav2LetterModel.hpp" /* Model class for running inference. */
+#include "UseCaseCommonUtils.hpp" /* Utils functions. */
+#include "AsrClassifier.hpp" /* Classifier. */
+#include "InputFiles.hpp" /* Generated audio clip header. */
+#include "Wav2LetterPreprocess.hpp" /* Pre-processing class. */
+#include "Wav2LetterPostprocess.hpp" /* Post-processing class. */
+
+enum opcodes
+{
+ MENU_OPT_RUN_INF_NEXT = 1, /* Run on next vector. */
+ MENU_OPT_RUN_INF_CHOSEN, /* Run on a user provided vector index. */
+ MENU_OPT_RUN_INF_ALL, /* Run inference on all. */
+ MENU_OPT_SHOW_MODEL_INFO, /* Show model info. */
+ MENU_OPT_LIST_AUDIO_CLIPS /* List the current baked audio clips. */
+};
+
+static void DisplayMenu()
+{
+ printf("\n\nUser input required\n");
+ printf("Enter option number from:\n\n");
+ printf(" %u. Classify next audio clip\n", MENU_OPT_RUN_INF_NEXT);
+ printf(" %u. Classify audio clip at chosen index\n", MENU_OPT_RUN_INF_CHOSEN);
+ printf(" %u. Run classification on all audio clips\n", MENU_OPT_RUN_INF_ALL);
+ printf(" %u. Show NN model info\n", MENU_OPT_SHOW_MODEL_INFO);
+ printf(" %u. List audio clips\n\n", MENU_OPT_LIST_AUDIO_CLIPS);
+ printf(" Choice: ");
+}
+
+/** @brief Verify input and output tensor are of certain min dimensions. */
+static bool VerifyTensorDimensions(const arm::app::Model& model);
+
+/** @brief Gets the number of MFCC features for a single window. */
+static uint32_t GetNumMfccFeatures(const arm::app::Model& model);
+
+/** @brief Gets the number of MFCC feature vectors to be computed. */
+static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model);
+
+/** @brief Gets the output context length (left and right) for post-processing. */
+static uint32_t GetOutputContextLen(const arm::app::Model& model,
+ uint32_t inputCtxLen);
+
+/** @brief Gets the output inner length for post-processing. */
+static uint32_t GetOutputInnerLen(const arm::app::Model& model,
+ uint32_t outputCtxLen);
+
+void main_loop(hal_platform& platform)
+{
+ arm::app::Wav2LetterModel model; /* Model wrapper object. */
+
+ /* Load the model. */
+ if (!model.Init()) {
+ printf_err("Failed to initialise model\n");
+ return;
+ } else if (!VerifyTensorDimensions(model)) {
+ printf_err("Model's input or output dimension verification failed\n");
+ return;
+ }
+
+ /* Initialise pre-processing. */
+ arm::app::audio::asr::Preprocess prep(
+ GetNumMfccFeatures(model),
+ g_FrameLength,
+ g_FrameStride,
+ GetNumMfccFeatureVectors(model));
+
+ /* Initialise post-processing. */
+ const uint32_t outputCtxLen = GetOutputContextLen(model, g_ctxLen);
+ const uint32_t blankTokenIdx = 28;
+ arm::app::audio::asr::Postprocess postp(
+ outputCtxLen,
+ GetOutputInnerLen(model, outputCtxLen),
+ blankTokenIdx);
+
+ /* Instantiate application context. */
+ arm::app::ApplicationContext caseContext;
+ std::vector <std::string> labels;
+ GetLabelsVector(labels);
+ arm::app::AsrClassifier classifier; /* Classifier wrapper object. */
+
+ caseContext.Set<hal_platform&>("platform", platform);
+ caseContext.Set<arm::app::Model&>("model", model);
+ caseContext.Set<uint32_t>("clipIndex", 0);
+ caseContext.Set<uint32_t>("frameLength", g_FrameLength);
+ caseContext.Set<uint32_t>("frameStride", g_FrameStride);
+ caseContext.Set<float>("scoreThreshold", g_ScoreThreshold); /* Score threshold. */
+ caseContext.Set<uint32_t>("ctxLen", g_ctxLen); /* Left and right context length (MFCC feat vectors). */
+ caseContext.Set<const std::vector <std::string>&>("labels", labels);
+ caseContext.Set<arm::app::AsrClassifier&>("classifier", classifier);
+ caseContext.Set<arm::app::audio::asr::Preprocess&>("preprocess", prep);
+ caseContext.Set<arm::app::audio::asr::Postprocess&>("postprocess", postp);
+
+ bool executionSuccessful = true;
+ constexpr bool bUseMenu = NUMBER_OF_FILES > 1 ? true : false;
+
+ /* Loop. */
+ do {
+ int menuOption = MENU_OPT_RUN_INF_NEXT;
+ if (bUseMenu) {
+ DisplayMenu();
+ menuOption = arm::app::ReadUserInputAsInt(platform);
+ printf("\n");
+ }
+ switch (menuOption) {
+ case MENU_OPT_RUN_INF_NEXT:
+ executionSuccessful = ClassifyAudioHandler(
+ caseContext,
+ caseContext.Get<uint32_t>("clipIndex"),
+ false);
+ break;
+ case MENU_OPT_RUN_INF_CHOSEN: {
+ printf(" Enter the audio clip index [0, %d]: ",
+ NUMBER_OF_FILES-1);
+ auto clipIndex = static_cast<uint32_t>(
+ arm::app::ReadUserInputAsInt(platform));
+ executionSuccessful = ClassifyAudioHandler(caseContext,
+ clipIndex,
+ false);
+ break;
+ }
+ case MENU_OPT_RUN_INF_ALL:
+ executionSuccessful = ClassifyAudioHandler(
+ caseContext,
+ caseContext.Get<uint32_t>("clipIndex"),
+ true);
+ break;
+ case MENU_OPT_SHOW_MODEL_INFO:
+ executionSuccessful = model.ShowModelInfoHandler();
+ break;
+ case MENU_OPT_LIST_AUDIO_CLIPS:
+ executionSuccessful = ListFilesHandler(caseContext);
+ break;
+ default:
+ printf("Incorrect choice, try again.");
+ break;
+ }
+ } while (executionSuccessful && bUseMenu);
+ info("Main loop terminated.\n");
+}
+
+static bool VerifyTensorDimensions(const arm::app::Model& model)
+{
+ /* Populate tensor related parameters. */
+ TfLiteTensor* inputTensor = model.GetInputTensor(0);
+ if (!inputTensor->dims) {
+ printf_err("Invalid input tensor dims\n");
+ return false;
+ } else if (inputTensor->dims->size < 3) {
+ printf_err("Input tensor dimension should be >= 3\n");
+ return false;
+ }
+
+ TfLiteTensor* outputTensor = model.GetOutputTensor(0);
+ if (!outputTensor->dims) {
+ printf_err("Invalid output tensor dims\n");
+ return false;
+ } else if (outputTensor->dims->size < 3) {
+ printf_err("Output tensor dimension should be >= 3\n");
+ return false;
+ }
+
+ return true;
+}
+
+static uint32_t GetNumMfccFeatures(const arm::app::Model& model)
+{
+ TfLiteTensor* inputTensor = model.GetInputTensor(0);
+ const int inputCols = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputColsIdx];
+ if (0 != inputCols % 3) {
+ printf_err("Number of input columns is not a multiple of 3\n");
+ }
+ return std::max(inputCols/3, 0);
+}
+
+static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model)
+{
+ TfLiteTensor* inputTensor = model.GetInputTensor(0);
+ const int inputRows = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputRowsIdx];
+ return std::max(inputRows, 0);
+}
+
+static uint32_t GetOutputContextLen(const arm::app::Model& model, const uint32_t inputCtxLen)
+{
+ const uint32_t inputRows = GetNumMfccFeatureVectors(model);
+ const uint32_t inputInnerLen = inputRows - (2 * inputCtxLen);
+ constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx;
+
+ /* Check to make sure that the input tensor supports the above
+ * context and inner lengths. */
+ if (inputRows <= 2 * inputCtxLen || inputRows <= inputInnerLen) {
+ printf_err("Input rows not compatible with ctx of %u\n",
+ inputCtxLen);
+ return 0;
+ }
+
+ TfLiteTensor* outputTensor = model.GetOutputTensor(0);
+ const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);
+
+ const float tensorColRatio = static_cast<float>(inputRows)/
+ static_cast<float>(outputRows);
+
+ return std::round(static_cast<float>(inputCtxLen)/tensorColRatio);
+}
+
+static uint32_t GetOutputInnerLen(const arm::app::Model& model,
+ const uint32_t outputCtxLen)
+{
+ constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx;
+ TfLiteTensor* outputTensor = model.GetOutputTensor(0);
+ const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);
+ return (outputRows - (2 * outputCtxLen));
+}
diff --git a/source/use_case/asr/src/OutputDecode.cc b/source/use_case/asr/src/OutputDecode.cc
new file mode 100644
index 0000000..41fbe07
--- /dev/null
+++ b/source/use_case/asr/src/OutputDecode.cc
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "OutputDecode.hpp"
+
+namespace arm {
+namespace app {
+namespace audio {
+namespace asr {
+
+ std::string DecodeOutput(const std::vector<ClassificationResult>& vecResults)
+ {
+ std::string CleanOutputBuffer;
+
+ for (size_t i = 0; i < vecResults.size(); ++i) /* For all elements in vector. */
+ {
+ while (i+1 < vecResults.size() &&
+ vecResults[i].m_label == vecResults[i+1].m_label) /* While the current element is equal to the next, ignore it and move on. */
+ {
+ ++i;
+ }
+ if (vecResults[i].m_label != "$") /* $ is a character used to represent unknown and double characters so should not be in output. */
+ {
+ CleanOutputBuffer += vecResults[i].m_label; /* If the element is different to the next, it will be appended to CleanOutputBuffer. */
+ }
+ }
+
+ return CleanOutputBuffer; /* Return string type containing clean output. */
+ }
+
+} /* namespace asr */
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/use_case/asr/src/UseCaseHandler.cc b/source/use_case/asr/src/UseCaseHandler.cc
new file mode 100644
index 0000000..e706eb8
--- /dev/null
+++ b/source/use_case/asr/src/UseCaseHandler.cc
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "UseCaseHandler.hpp"
+
+#include "InputFiles.hpp"
+#include "AsrClassifier.hpp"
+#include "Wav2LetterModel.hpp"
+#include "hal.h"
+#include "Wav2LetterMfcc.hpp"
+#include "AudioUtils.hpp"
+#include "UseCaseCommonUtils.hpp"
+#include "AsrResult.hpp"
+#include "Wav2LetterPreprocess.hpp"
+#include "Wav2LetterPostprocess.hpp"
+#include "OutputDecode.hpp"
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief Helper function to increment current audio clip index.
+ * @param[in,out] ctx Pointer to the application context object.
+ **/
+ static void _IncrementAppCtxClipIdx(ApplicationContext& ctx);
+
+ /**
+ * @brief Helper function to set the audio clip index.
+ * @param[in,out] ctx Pointer to the application context object.
+ * @param[in] idx Value to be set.
+ * @return true if index is set, false otherwise.
+ **/
+ static bool _SetAppCtxClipIdx(ApplicationContext& ctx, uint32_t idx);
+
+ /**
+ * @brief Presents inference results using the data presentation
+ * object.
+ * @param[in] platform Reference to the hal platform object.
+ * @param[in] results Vector of classification results to be displayed.
+ * @param[in] infTimeMs Inference time in milliseconds, if available
+ * otherwise, this can be passed in as 0.
+ * @return true if successful, false otherwise.
+ **/
+ static bool _PresentInferenceResult(
+ hal_platform& platform,
+ const std::vector<arm::app::asr::AsrResult>& results);
+
+ /* Audio inference classification handler. */
+ bool ClassifyAudioHandler(ApplicationContext& ctx, uint32_t clipIndex, bool runAll)
+ {
+ constexpr uint32_t dataPsnTxtInfStartX = 20;
+ constexpr uint32_t dataPsnTxtInfStartY = 40;
+
+ auto& platform = ctx.Get<hal_platform&>("platform");
+ platform.data_psn->clear(COLOR_BLACK);
+
+ /* If the request has a valid size, set the audio index. */
+ if (clipIndex < NUMBER_OF_FILES) {
+ if (!_SetAppCtxClipIdx(ctx, clipIndex)) {
+ return false;
+ }
+ }
+
+ /* Get model reference. */
+ auto& model = ctx.Get<Model&>("model");
+ if (!model.IsInited()) {
+ printf_err("Model is not initialised! Terminating processing.\n");
+ return false;
+ }
+
+ /* Get score threshold to be applied for the classifier (post-inference). */
+ auto scoreThreshold = ctx.Get<float>("scoreThreshold");
+
+ /* Get tensors. Dimensions of the tensor should have been verified by
+ * the callee. */
+ TfLiteTensor* inputTensor = model.GetInputTensor(0);
+ TfLiteTensor* outputTensor = model.GetOutputTensor(0);
+ const uint32_t inputRows = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputRowsIdx];
+
+ /* Populate MFCC related parameters. */
+ auto mfccParamsWinLen = ctx.Get<uint32_t>("frameLength");
+ auto mfccParamsWinStride = ctx.Get<uint32_t>("frameStride");
+
+ /* Populate ASR inference context and inner lengths for input. */
+ auto inputCtxLen = ctx.Get<uint32_t>("ctxLen");
+ const uint32_t inputInnerLen = inputRows - (2 * inputCtxLen);
+
+ /* Audio data stride corresponds to inputInnerLen feature vectors. */
+ const uint32_t audioParamsWinLen = (inputRows - 1) * mfccParamsWinStride + (mfccParamsWinLen);
+ const uint32_t audioParamsWinStride = inputInnerLen * mfccParamsWinStride;
+ const float audioParamsSecondsPerSample = (1.0/audio::Wav2LetterMFCC::ms_defaultSamplingFreq);
+
+ /* Get pre/post-processing objects. */
+ auto& prep = ctx.Get<audio::asr::Preprocess&>("preprocess");
+ auto& postp = ctx.Get<audio::asr::Postprocess&>("postprocess");
+
+ /* Set default reduction axis for post-processing. */
+ const uint32_t reductionAxis = arm::app::Wav2LetterModel::ms_outputRowsIdx;
+
+ /* Audio clip start index. */
+ auto startClipIdx = ctx.Get<uint32_t>("clipIndex");
+
+ /* Loop to process audio clips. */
+ do {
+ /* Get current audio clip index. */
+ auto currentIndex = ctx.Get<uint32_t>("clipIndex");
+
+ /* Get the current audio buffer and respective size. */
+ const int16_t* audioArr = get_audio_array(currentIndex);
+ const uint32_t audioArrSize = get_audio_array_size(currentIndex);
+
+ if (!audioArr) {
+ printf_err("Invalid audio array pointer\n");
+ return false;
+ }
+
+ /* Audio clip must have enough samples to produce 1 MFCC feature. */
+ if (audioArrSize < mfccParamsWinLen) {
+ printf_err("Not enough audio samples, minimum needed is %u\n", mfccParamsWinLen);
+ return false;
+ }
+
+ /* Initialise an audio slider. */
+ auto audioDataSlider = audio::ASRSlidingWindow<const int16_t>(
+ audioArr,
+ audioArrSize,
+ audioParamsWinLen,
+ audioParamsWinStride);
+
+ /* Declare a container for results. */
+ std::vector<arm::app::asr::AsrResult> results;
+
+ /* Display message on the LCD - inference running. */
+ std::string str_inf{"Running inference... "};
+ platform.data_psn->present_data_text(
+ str_inf.c_str(), str_inf.size(),
+ dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+
+ info("Running inference on audio clip %u => %s\n", currentIndex,
+ get_filename(currentIndex));
+
+ size_t inferenceWindowLen = audioParamsWinLen;
+
+ /* Start sliding through audio clip. */
+ while (audioDataSlider.HasNext()) {
+
+ /* If not enough audio see how much can be sent for processing. */
+ size_t nextStartIndex = audioDataSlider.NextWindowStartIndex();
+ if (nextStartIndex + audioParamsWinLen > audioArrSize) {
+ inferenceWindowLen = audioArrSize - nextStartIndex;
+ }
+
+ const int16_t* inferenceWindow = audioDataSlider.Next();
+
+ info("Inference %zu/%zu\n", audioDataSlider.Index() + 1,
+ static_cast<size_t>(ceilf(audioDataSlider.FractionalTotalStrides() + 1)));
+
+ Profiler prepProfiler{&platform, "pre-processing"};
+ prepProfiler.StartProfiling();
+
+ /* Calculate MFCCs, deltas and populate the input tensor. */
+ prep.Invoke(inferenceWindow, inferenceWindowLen, inputTensor);
+
+ prepProfiler.StopProfiling();
+ std::string prepProfileResults = prepProfiler.GetResultsAndReset();
+ info("%s\n", prepProfileResults.c_str());
+
+ /* Run inference over this audio clip sliding window. */
+ arm::app::RunInference(platform, model);
+
+ /* Post-process. */
+ postp.Invoke(outputTensor, reductionAxis, !audioDataSlider.HasNext());
+
+ /* Get results. */
+ std::vector<ClassificationResult> classificationResult;
+ auto& classifier = ctx.Get<AsrClassifier&>("classifier");
+ classifier.GetClassificationResults(
+ outputTensor, classificationResult,
+ ctx.Get<std::vector<std::string>&>("labels"), 1);
+
+ results.emplace_back(asr::AsrResult(classificationResult,
+ (audioDataSlider.Index() *
+ audioParamsSecondsPerSample *
+ audioParamsWinStride),
+ audioDataSlider.Index(), scoreThreshold));
+
+#if VERIFY_TEST_OUTPUT
+ arm::app::DumpTensor(outputTensor,
+ outputTensor->dims->data[arm::app::Wav2LetterModel::ms_outputColsIdx]);
+#endif /* VERIFY_TEST_OUTPUT */
+
+ }
+
+ /* Erase. */
+ str_inf = std::string(str_inf.size(), ' ');
+ platform.data_psn->present_data_text(
+ str_inf.c_str(), str_inf.size(),
+ dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+
+ ctx.Set<std::vector<arm::app::asr::AsrResult>>("results", results);
+
+ if (!_PresentInferenceResult(platform, results)) {
+ return false;
+ }
+
+ _IncrementAppCtxClipIdx(ctx);
+
+ } while (runAll && ctx.Get<uint32_t>("clipIndex") != startClipIdx);
+
+ return true;
+ }
+
+ static void _IncrementAppCtxClipIdx(ApplicationContext& ctx)
+ {
+ auto curAudioIdx = ctx.Get<uint32_t>("clipIndex");
+
+ if (curAudioIdx + 1 >= NUMBER_OF_FILES) {
+ ctx.Set<uint32_t>("clipIndex", 0);
+ return;
+ }
+ ++curAudioIdx;
+ ctx.Set<uint32_t>("clipIndex", curAudioIdx);
+ }
+
+ static bool _SetAppCtxClipIdx(ApplicationContext& ctx, const uint32_t idx)
+ {
+ if (idx >= NUMBER_OF_FILES) {
+ printf_err("Invalid idx %u (expected less than %u)\n",
+ idx, NUMBER_OF_FILES);
+ return false;
+ }
+
+ ctx.Set<uint32_t>("clipIndex", idx);
+ return true;
+ }
+
+ static bool _PresentInferenceResult(hal_platform& platform,
+ const std::vector<arm::app::asr::AsrResult>& results)
+ {
+ constexpr uint32_t dataPsnTxtStartX1 = 20;
+ constexpr uint32_t dataPsnTxtStartY1 = 60;
+ constexpr bool allow_multiple_lines = true;
+
+ platform.data_psn->set_text_color(COLOR_GREEN);
+
+ /* Results from multiple inferences should be combined before processing. */
+ std::vector<arm::app::ClassificationResult> combinedResults;
+ for (auto& result : results) {
+ combinedResults.insert(combinedResults.end(),
+ result.m_resultVec.begin(),
+ result.m_resultVec.end());
+ }
+
+ /* Get each inference result string using the decoder. */
+ for (const auto & result : results) {
+ std::string infResultStr = audio::asr::DecodeOutput(result.m_resultVec);
+
+ info("Result for inf %u: %s\n", result.m_inferenceNumber,
+ infResultStr.c_str());
+ }
+
+ /* Get the decoded result for the combined result. */
+ std::string finalResultStr = audio::asr::DecodeOutput(combinedResults);
+
+ platform.data_psn->present_data_text(
+ finalResultStr.c_str(), finalResultStr.size(),
+ dataPsnTxtStartX1, dataPsnTxtStartY1,
+ allow_multiple_lines);
+
+ info("Final result: %s\n", finalResultStr.c_str());
+ return true;
+ }
+
+} /* namespace app */
+} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/asr/src/Wav2LetterMfcc.cc b/source/use_case/asr/src/Wav2LetterMfcc.cc
new file mode 100644
index 0000000..92c91bc
--- /dev/null
+++ b/source/use_case/asr/src/Wav2LetterMfcc.cc
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Wav2LetterMfcc.hpp"
+
+#include "PlatformMath.hpp"
+
+#include <cfloat>
+
+namespace arm {
+namespace app {
+namespace audio {
+
+ bool Wav2LetterMFCC::ApplyMelFilterBank(
+ std::vector<float>& fftVec,
+ std::vector<std::vector<float>>& melFilterBank,
+ std::vector<int32_t>& filterBankFilterFirst,
+ std::vector<int32_t>& filterBankFilterLast,
+ std::vector<float>& melEnergies)
+ {
+ const size_t numBanks = melEnergies.size();
+
+ if (numBanks != filterBankFilterFirst.size() ||
+ numBanks != filterBankFilterLast.size()) {
+ printf_err("Unexpected filter bank lengths\n");
+ return false;
+ }
+
+ for (size_t bin = 0; bin < numBanks; ++bin) {
+ auto filterBankIter = melFilterBank[bin].begin();
+ float melEnergy = 1e-10; /* Avoid log of zero at later stages, same value used in librosa. */
+ const int32_t firstIndex = filterBankFilterFirst[bin];
+ const int32_t lastIndex = filterBankFilterLast[bin];
+
+ for (int32_t i = firstIndex; i <= lastIndex; ++i) {
+ melEnergy += (*filterBankIter++ * fftVec[i]);
+ }
+
+ melEnergies[bin] = melEnergy;
+ }
+
+ return true;
+ }
+
+ void Wav2LetterMFCC::ConvertToLogarithmicScale(
+ std::vector<float>& melEnergies)
+ {
+ float maxMelEnergy = -FLT_MAX;
+
+ /* Container for natural logarithms of mel energies. */
+ std::vector <float> vecLogEnergies(melEnergies.size(), 0.f);
+
+ /* Because we are taking natural logs, we need to multiply by log10(e).
+ * Also, for wav2letter model, we scale our log10 values by 10. */
+ constexpr float multiplier = 10.0 * /* Default scalar. */
+ 0.4342944819032518; /* log10f(std::exp(1.0)) */
+
+ /* Take log of the whole vector. */
+ math::MathUtils::VecLogarithmF32(melEnergies, vecLogEnergies);
+
+ /* Scale the log values and get the max. */
+ for (auto iterM = melEnergies.begin(), iterL = vecLogEnergies.begin();
+ iterM != melEnergies.end(); ++iterM, ++iterL) {
+
+ *iterM = *iterL * multiplier;
+
+ /* Save the max mel energy. */
+ if (*iterM > maxMelEnergy) {
+ maxMelEnergy = *iterM;
+ }
+ }
+
+ /* Clamp the mel energies. */
+ constexpr float maxDb = 80.0;
+ const float clampLevelLowdB = maxMelEnergy - maxDb;
+ for (auto iter = melEnergies.begin(); iter != melEnergies.end(); ++iter) {
+ *iter = std::max(*iter, clampLevelLowdB);
+ }
+ }
+
+ std::vector<float> Wav2LetterMFCC::CreateDCTMatrix(
+ const int32_t inputLength,
+ const int32_t coefficientCount)
+ {
+ std::vector<float> dctMatix(inputLength * coefficientCount);
+
+ /* Orthonormal normalization. */
+ const float normalizerK0 = 2 * math::MathUtils::SqrtF32(1.0f /
+ static_cast<float>(4*inputLength));
+ const float normalizer = 2 * math::MathUtils::SqrtF32(1.0f /
+ static_cast<float>(2*inputLength));
+
+ const float angleIncr = M_PI / inputLength;
+ float angle = angleIncr; /* We start using it at k = 1 loop. */
+
+ /* First row of DCT will use normalizer K0. */
+ for (int32_t n = 0; n < inputLength; ++n) {
+ dctMatix[n] = normalizerK0 /* cos(0) = 1 */;
+ }
+
+ /* Second row (index = 1) onwards, we use standard normalizer. */
+ for (int32_t k = 1, m = inputLength; k < coefficientCount; ++k, m += inputLength) {
+ for (int32_t n = 0; n < inputLength; ++n) {
+ dctMatix[m+n] = normalizer *
+ math::MathUtils::CosineF32((n + 0.5f) * angle);
+ }
+ angle += angleIncr;
+ }
+ return dctMatix;
+ }
+
+ float Wav2LetterMFCC::GetMelFilterBankNormaliser(
+ const float& leftMel,
+ const float& rightMel,
+ const bool useHTKMethod)
+ {
+ /* Slaney normalization for mel weights. */
+ return (2.0f / (MFCC::InverseMelScale(rightMel, useHTKMethod) -
+ MFCC::InverseMelScale(leftMel, useHTKMethod)));
+ }
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/use_case/asr/src/Wav2LetterModel.cc b/source/use_case/asr/src/Wav2LetterModel.cc
new file mode 100644
index 0000000..5aefecd
--- /dev/null
+++ b/source/use_case/asr/src/Wav2LetterModel.cc
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Wav2LetterModel.hpp"
+
+#include "hal.h"
+
+const tflite::MicroOpResolver& arm::app::Wav2LetterModel::GetOpResolver()
+{
+ return this->_m_opResolver;
+}
+
+bool arm::app::Wav2LetterModel::EnlistOperations()
+{
+ this->_m_opResolver.AddConv2D();
+ this->_m_opResolver.AddMul();
+ this->_m_opResolver.AddMaximum();
+ this->_m_opResolver.AddReshape();
+
+#if defined(ARM_NPU)
+ if (kTfLiteOk == this->_m_opResolver.AddEthosU()) {
+ info("Added %s support to op resolver\n",
+ tflite::GetString_ETHOSU());
+ } else {
+ printf_err("Failed to add Arm NPU support to op resolver.");
+ return false;
+ }
+#endif /* ARM_NPU */
+
+ return true;
+}
+
+extern uint8_t* GetModelPointer();
+const uint8_t* arm::app::Wav2LetterModel::ModelPointer()
+{
+ return GetModelPointer();
+}
+
+extern size_t GetModelLen();
+size_t arm::app::Wav2LetterModel::ModelSize()
+{
+ return GetModelLen();
+} \ No newline at end of file
diff --git a/source/use_case/asr/src/Wav2LetterPostprocess.cc b/source/use_case/asr/src/Wav2LetterPostprocess.cc
new file mode 100644
index 0000000..60ee51e
--- /dev/null
+++ b/source/use_case/asr/src/Wav2LetterPostprocess.cc
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Wav2LetterPostprocess.hpp"
+
+#include "Wav2LetterModel.hpp"
+
+
+namespace arm {
+namespace app {
+namespace audio {
+namespace asr {
+
+ Postprocess::Postprocess(const uint32_t contextLen,
+ const uint32_t innerLen,
+ const uint32_t blankTokenIdx)
+ : _m_contextLen(contextLen),
+ _m_innerLen(innerLen),
+ _m_totalLen(2 * this->_m_contextLen + this->_m_innerLen),
+ _m_countIterations(0),
+ _m_blankTokenIdx(blankTokenIdx)
+ {}
+
+ bool Postprocess::Invoke(TfLiteTensor* tensor,
+ const uint32_t axisIdx,
+ const bool lastIteration)
+ {
+ /* Basic checks. */
+ if (!this->_IsInputValid(tensor, axisIdx)) {
+ return false;
+ }
+
+ /* Irrespective of tensor type, we use unsigned "byte" */
+ uint8_t* ptrData = tflite::GetTensorData<uint8_t>(tensor);
+ const uint32_t elemSz = this->_GetTensorElementSize(tensor);
+
+ /* Other sanity checks. */
+ if (0 == elemSz) {
+ printf_err("Tensor type not supported for post processing\n");
+ return false;
+ } else if (elemSz * this->_m_totalLen > tensor->bytes) {
+ printf_err("Insufficient number of tensor bytes\n");
+ return false;
+ }
+
+ /* Which axis do we need to process? */
+ switch (axisIdx) {
+ case arm::app::Wav2LetterModel::ms_outputRowsIdx:
+ return this->_EraseSectionsRowWise(ptrData,
+ elemSz * tensor->dims->data[arm::app::Wav2LetterModel::ms_outputColsIdx],
+ lastIteration);
+ case arm::app::Wav2LetterModel::ms_outputColsIdx:
+ return this->_EraseSectionsColWise(ptrData,
+ elemSz * tensor->dims->data[arm::app::Wav2LetterModel::ms_outputRowsIdx],
+ lastIteration);
+ default:
+ printf_err("Unsupported axis index: %u\n", axisIdx);
+ }
+
+ return false;
+ }
+
+ bool Postprocess::_IsInputValid(TfLiteTensor* tensor,
+ const uint32_t axisIdx) const
+ {
+ if (nullptr == tensor) {
+ return false;
+ }
+
+ if (static_cast<int>(axisIdx) >= tensor->dims->size) {
+ printf_err("Invalid axis index: %u; Max: %d\n",
+ axisIdx, tensor->dims->size);
+ return false;
+ }
+
+ if (static_cast<int>(this->_m_totalLen) !=
+ tensor->dims->data[axisIdx]) {
+ printf_err("Unexpected tensor dimension for axis %d, \n",
+ tensor->dims->data[axisIdx]);
+ return false;
+ }
+
+ return true;
+ }
+
+ uint32_t Postprocess::_GetTensorElementSize(TfLiteTensor* tensor)
+ {
+ switch(tensor->type) {
+ case kTfLiteUInt8:
+ return 1;
+ case kTfLiteInt8:
+ return 1;
+ case kTfLiteInt16:
+ return 2;
+ case kTfLiteInt32:
+ return 4;
+ case kTfLiteFloat32:
+ return 4;
+ default:
+ printf_err("Unsupported tensor type %s\n",
+ TfLiteTypeGetName(tensor->type));
+ }
+
+ return 0;
+ }
+
+ bool Postprocess::_EraseSectionsRowWise(
+ uint8_t* ptrData,
+ const uint32_t strideSzBytes,
+ const bool lastIteration)
+ {
+ /* In this case, the "zero-ing" is quite simple as the region
+ * to be zeroed sits in contiguous memory (row-major). */
+ const uint32_t eraseLen = strideSzBytes * this->_m_contextLen;
+
+ /* Erase left context? */
+ if (this->_m_countIterations > 0) {
+ /* Set output of each classification window to the blank token. */
+ std::memset(ptrData, 0, eraseLen);
+ for (size_t windowIdx = 0; windowIdx < this->_m_contextLen; windowIdx++) {
+ ptrData[windowIdx*strideSzBytes + this->_m_blankTokenIdx] = 1;
+ }
+ }
+
+ /* Erase right context? */
+ if (false == lastIteration) {
+ uint8_t * rightCtxPtr = ptrData + (strideSzBytes * (this->_m_contextLen + this->_m_innerLen));
+ /* Set output of each classification window to the blank token. */
+ std::memset(rightCtxPtr, 0, eraseLen);
+ for (size_t windowIdx = 0; windowIdx < this->_m_contextLen; windowIdx++) {
+ rightCtxPtr[windowIdx*strideSzBytes + this->_m_blankTokenIdx] = 1;
+ }
+ }
+
+ if (lastIteration) {
+ this->_m_countIterations = 0;
+ } else {
+ ++this->_m_countIterations;
+ }
+
+ return true;
+ }
+
+ bool Postprocess::_EraseSectionsColWise(
+ uint8_t* ptrData,
+ const uint32_t strideSzBytes,
+ const bool lastIteration)
+ {
+ /* Not implemented. */
+ UNUSED(ptrData);
+ UNUSED(strideSzBytes);
+ UNUSED(lastIteration);
+ return false;
+ }
+
+} /* namespace asr */
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/asr/src/Wav2LetterPreprocess.cc b/source/use_case/asr/src/Wav2LetterPreprocess.cc
new file mode 100644
index 0000000..e46cca3
--- /dev/null
+++ b/source/use_case/asr/src/Wav2LetterPreprocess.cc
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Wav2LetterPreprocess.hpp"
+
+#include "PlatformMath.hpp"
+#include "TensorFlowLiteMicro.hpp"
+
+#include <algorithm>
+#include <cmath>
+
+namespace arm {
+namespace app {
+namespace audio {
+namespace asr {
+
+ Preprocess::Preprocess(
+ const uint32_t numMfccFeatures,
+ const uint32_t windowLen,
+ const uint32_t windowStride,
+ const uint32_t numMfccVectors):
+ _m_mfcc(numMfccFeatures, windowLen),
+ _m_mfccBuf(numMfccFeatures, numMfccVectors),
+ _m_delta1Buf(numMfccFeatures, numMfccVectors),
+ _m_delta2Buf(numMfccFeatures, numMfccVectors),
+ _m_windowLen(windowLen),
+ _m_windowStride(windowStride),
+ _m_numMfccFeats(numMfccFeatures),
+ _m_numFeatVectors(numMfccVectors),
+ _m_window()
+ {
+ if (numMfccFeatures > 0 && windowLen > 0) {
+ this->_m_mfcc.Init();
+ }
+ }
+
+ bool Preprocess::Invoke(
+ const int16_t* audioData,
+ const uint32_t audioDataLen,
+ TfLiteTensor* tensor)
+ {
+ this->_m_window = SlidingWindow<const int16_t>(
+ audioData, audioDataLen,
+ this->_m_windowLen, this->_m_windowStride);
+
+ uint32_t mfccBufIdx = 0;
+
+ std::fill(_m_mfccBuf.begin(), _m_mfccBuf.end(), 0.f);
+ std::fill(_m_delta1Buf.begin(), _m_delta1Buf.end(), 0.f);
+ std::fill(_m_delta2Buf.begin(), _m_delta2Buf.end(), 0.f);
+
+ /* While we can slide over the window. */
+ while (this->_m_window.HasNext()) {
+ const int16_t* mfccWindow = this->_m_window.Next();
+ auto mfccAudioData = std::vector<int16_t>(
+ mfccWindow,
+ mfccWindow + this->_m_windowLen);
+ auto mfcc = this->_m_mfcc.MfccCompute(mfccAudioData);
+ for (size_t i = 0; i < this->_m_mfccBuf.size(0); ++i) {
+ this->_m_mfccBuf(i, mfccBufIdx) = mfcc[i];
+ }
+ ++mfccBufIdx;
+ }
+
+ /* Pad MFCC if needed by adding MFCC for zeros. */
+ if (mfccBufIdx != this->_m_numFeatVectors) {
+ std::vector<int16_t> zerosWindow = std::vector<int16_t>(this->_m_windowLen, 0);
+ std::vector<float> mfccZeros = this->_m_mfcc.MfccCompute(zerosWindow);
+
+ while (mfccBufIdx != this->_m_numFeatVectors) {
+ memcpy(&this->_m_mfccBuf(0, mfccBufIdx),
+ mfccZeros.data(), sizeof(float) * _m_numMfccFeats);
+ ++mfccBufIdx;
+ }
+ }
+
+ /* Compute first and second order deltas from MFCCs. */
+ this->_ComputeDeltas(this->_m_mfccBuf,
+ this->_m_delta1Buf,
+ this->_m_delta2Buf);
+
+ /* Normalise. */
+ this->_Normalise();
+
+ /* Quantise. */
+ QuantParams quantParams = GetTensorQuantParams(tensor);
+
+ if (0 == quantParams.scale) {
+ printf_err("Quantisation scale can't be 0\n");
+ return false;
+ }
+
+ switch(tensor->type) {
+ case kTfLiteUInt8:
+ return this->_Quantise<uint8_t>(
+ tflite::GetTensorData<uint8_t>(tensor), tensor->bytes,
+ quantParams.scale, quantParams.offset);
+ case kTfLiteInt8:
+ return this->_Quantise<int8_t>(
+ tflite::GetTensorData<int8_t>(tensor), tensor->bytes,
+ quantParams.scale, quantParams.offset);
+ default:
+ printf_err("Unsupported tensor type %s\n",
+ TfLiteTypeGetName(tensor->type));
+ }
+
+ return false;
+ }
+
+ bool Preprocess::_ComputeDeltas(Array2d<float>& mfcc,
+ Array2d<float>& delta1,
+ Array2d<float>& delta2)
+ {
+ const std::vector <float> delta1Coeffs =
+ {6.66666667e-02, 5.00000000e-02, 3.33333333e-02,
+ 1.66666667e-02, -3.46944695e-18, -1.66666667e-02,
+ -3.33333333e-02, -5.00000000e-02, -6.66666667e-02};
+
+ const std::vector <float> delta2Coeffs =
+ {0.06060606, 0.01515152, -0.01731602,
+ -0.03679654, -0.04329004, -0.03679654,
+ -0.01731602, 0.01515152, 0.06060606};
+
+ if (delta1.size(0) == 0 || delta2.size(0) != delta1.size(0) ||
+ mfcc.size(0) == 0 || mfcc.size(1) == 0) {
+ return false;
+ }
+
+ /* Get the middle index; coeff vec len should always be odd. */
+ const size_t coeffLen = delta1Coeffs.size();
+ const size_t fMidIdx = (coeffLen - 1)/2;
+ const size_t numFeatures = mfcc.size(0);
+ const size_t numFeatVectors = mfcc.size(1);
+
+ /* Iterate through features in MFCC vector. */
+ for (size_t i = 0; i < numFeatures; ++i) {
+ /* For each feature, iterate through time (t) samples representing feature evolution and
+ * calculate d/dt and d^2/dt^2, using 1D convolution with differential kernels.
+ * Convolution padding = valid, result size is `time length - kernel length + 1`.
+ * The result is padded with 0 from both sides to match the size of initial time samples data.
+ *
+ * For the small filter, conv1D implementation as a simple loop is efficient enough.
+ * Filters of a greater size would need CMSIS-DSP functions to be used, like arm_fir_f32.
+ */
+
+ for (size_t j = fMidIdx; j < numFeatVectors - fMidIdx; ++j) {
+ float d1 = 0;
+ float d2 = 0;
+ const size_t mfccStIdx = j - fMidIdx;
+
+ for (size_t k = 0, m = coeffLen - 1; k < coeffLen; ++k, --m) {
+
+ d1 += mfcc(i,mfccStIdx + k) * delta1Coeffs[m];
+ d2 += mfcc(i,mfccStIdx + k) * delta2Coeffs[m];
+ }
+
+ delta1(i,j) = d1;
+ delta2(i,j) = d2;
+ }
+ }
+
+ return true;
+ }
+
+ float Preprocess::_GetMean(Array2d<float>& vec)
+ {
+ return math::MathUtils::MeanF32(vec.begin(), vec.totalSize());
+ }
+
+ float Preprocess::_GetStdDev(Array2d<float>& vec, const float mean)
+ {
+ return math::MathUtils::StdDevF32(vec.begin(), vec.totalSize(), mean);
+ }
+
+ void Preprocess::_NormaliseVec(Array2d<float>& vec)
+ {
+ auto mean = Preprocess::_GetMean(vec);
+ auto stddev = Preprocess::_GetStdDev(vec, mean);
+
+ debug("Mean: %f, Stddev: %f\n", mean, stddev);
+ if (stddev == 0) {
+ std::fill(vec.begin(), vec.end(), 0);
+ } else {
+ const float stddevInv = 1.f/stddev;
+ const float normalisedMean = mean/stddev;
+
+ auto NormalisingFunction = [=](float& value) {
+ value = value * stddevInv - normalisedMean;
+ };
+ std::for_each(vec.begin(), vec.end(), NormalisingFunction);
+ }
+ }
+
+ void Preprocess::_Normalise()
+ {
+ Preprocess::_NormaliseVec(this->_m_mfccBuf);
+ Preprocess::_NormaliseVec(this->_m_delta1Buf);
+ Preprocess::_NormaliseVec(this->_m_delta2Buf);
+ }
+
+ float Preprocess::_GetQuantElem(
+ const float elem,
+ const float quantScale,
+ const int quantOffset,
+ const float minVal,
+ const float maxVal)
+ {
+ float val = std::round((elem/quantScale) + quantOffset);
+ return std::min<float>(std::max<float>(val, minVal), maxVal);
+ }
+
+} /* namespace asr */
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/asr/usecase.cmake b/source/use_case/asr/usecase.cmake
new file mode 100644
index 0000000..e4b8752
--- /dev/null
+++ b/source/use_case/asr/usecase.cmake
@@ -0,0 +1,164 @@
+#----------------------------------------------------------------------------
+# Copyright (c) 2021 Arm Limited. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#----------------------------------------------------------------------------
+
+# If the path to a directory or source file has been defined,
+# get the type here (FILEPATH or PATH):
+if (DEFINED ${use_case}_FILE_PATH)
+ get_path_type(${${use_case}_FILE_PATH} PATH_TYPE)
+
+ # Set the default type if path is not a dir or file path (or undefined)
+ if (NOT ${PATH_TYPE} STREQUAL PATH AND NOT ${PATH_TYPE} STREQUAL FILEPATH)
+ message(FATAL_ERROR "Invalid ${use_case}_FILE_PATH. It should be a dir or file path.")
+ endif()
+else()
+ # Default is a directory path
+ set(PATH_TYPE PATH)
+endif()
+
+message(STATUS "${use_case}_FILE_PATH is of type: ${PATH_TYPE}")
+
+USER_OPTION(${use_case}_FILE_PATH "Directory with custom WAV input files, or path to a single WAV file, to use in the evaluation application."
+ ${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/samples/
+ ${PATH_TYPE})
+
+USER_OPTION(${use_case}_LABELS_TXT_FILE "Labels' txt file for the chosen model."
+ ${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/labels/labels_wav2letter.txt
+ FILEPATH)
+
+USER_OPTION(${use_case}_AUDIO_RATE "Specify the target sampling rate. Default is 16000."
+ 16000
+ STRING)
+
+USER_OPTION(${use_case}_AUDIO_MONO "Specify if the audio needs to be converted to mono. Default is ON."
+ ON
+ BOOL)
+
+USER_OPTION(${use_case}_AUDIO_OFFSET "Specify the offset to start reading after this time (in seconds). Default is 0."
+ 0
+ STRING)
+
+USER_OPTION(${use_case}_AUDIO_DURATION "Specify the audio duration to load (in seconds). If set to 0 the entire audio will be processed."
+ 0
+ STRING)
+
+USER_OPTION(${use_case}_AUDIO_RES_TYPE "Specify re-sampling algorithm to use. By default is 'kaiser_best'."
+ kaiser_best
+ STRING)
+
+USER_OPTION(${use_case}_AUDIO_MIN_SAMPLES "Specify the minimum number of samples to use. By default is 16000, if the audio is shorter will be automatically padded."
+ 16000
+ STRING)
+
+USER_OPTION(${use_case}_MODEL_SCORE_THRESHOLD "Specify the score threshold [0.0, 1.0) that must be applied to the inference results for a label to be deemed valid."
+ 0.5
+ STRING)
+
+# Generate input files
+generate_audio_code(${${use_case}_FILE_PATH} ${SRC_GEN_DIR} ${INC_GEN_DIR}
+ ${${use_case}_AUDIO_RATE}
+ ${${use_case}_AUDIO_MONO}
+ ${${use_case}_AUDIO_OFFSET}
+ ${${use_case}_AUDIO_DURATION}
+ ${${use_case}_AUDIO_RES_TYPE}
+ ${${use_case}_AUDIO_MIN_SAMPLES})
+
+# Generate labels file
+set(${use_case}_LABELS_CPP_FILE Labels)
+generate_labels_code(
+ INPUT "${${use_case}_LABELS_TXT_FILE}"
+ DESTINATION_SRC ${SRC_GEN_DIR}
+ DESTINATION_HDR ${INC_GEN_DIR}
+ OUTPUT_FILENAME "${${use_case}_LABELS_CPP_FILE}"
+)
+
+
+USER_OPTION(${use_case}_ACTIVATION_BUF_SZ "Activation buffer size for the chosen model"
+ 0x00200000
+ STRING)
+
+
+# If there is no tflite file pointed to
+if (NOT DEFINED ${use_case}_MODEL_TFLITE_PATH)
+
+ set(MODEL_FILENAME wav2letter_int8.tflite)
+ set(MODEL_RESOURCES_DIR ${DOWNLOAD_DEP_DIR}/${use_case})
+ file(MAKE_DIRECTORY ${MODEL_RESOURCES_DIR})
+ set(DEFAULT_MODEL_PATH ${MODEL_RESOURCES_DIR}/${MODEL_FILENAME})
+
+ # Download the default model
+ set(ZOO_COMMON_SUBPATH "models/speech_recognition/wav2letter/tflite_int8")
+ set(ZOO_MODEL_SUBPATH "${ZOO_COMMON_SUBPATH}/${MODEL_FILENAME}")
+
+ download_file_from_modelzoo(${ZOO_MODEL_SUBPATH} ${DEFAULT_MODEL_PATH})
+
+ if (ETHOS_U55_ENABLED)
+ message(STATUS
+ "Ethos-U55 is enabled, but the model downloaded is not optimized by vela. "
+ "To use Ethos-U55 acceleration, optimise the downloaded model and pass it "
+ "as ${use_case}_MODEL_TFLITE_PATH to the CMake configuration.")
+ endif()
+
+ # If the target platform is native
+ if (${TARGET_PLATFORM} STREQUAL native)
+
+ # Download test vectors
+ set(ZOO_TEST_IFM_SUBPATH "${ZOO_COMMON_SUBPATH}/testing_input/input_2_int8/0.npy")
+ set(ZOO_TEST_OFM_SUBPATH "${ZOO_COMMON_SUBPATH}/testing_output/Identity_int8/0.npy")
+
+ set(${use_case}_TEST_IFM ${MODEL_RESOURCES_DIR}/ifm0.npy CACHE FILEPATH
+ "Input test vector for ${use_case}")
+ set(${use_case}_TEST_OFM ${MODEL_RESOURCES_DIR}/ofm0.npy CACHE FILEPATH
+ "Input test vector for ${use_case}")
+
+ download_file_from_modelzoo(${ZOO_TEST_IFM_SUBPATH} ${${use_case}_TEST_IFM})
+ download_file_from_modelzoo(${ZOO_TEST_OFM_SUBPATH} ${${use_case}_TEST_OFM})
+
+ set(TEST_SRC_GEN_DIR ${CMAKE_BINARY_DIR}/generated/${use_case}/tests/src)
+ set(TEST_INC_GEN_DIR ${CMAKE_BINARY_DIR}/generated/${use_case}/tests/include)
+ file(MAKE_DIRECTORY ${TEST_SRC_GEN_DIR} ${TEST_INC_GEN_DIR})
+
+ # Generate test data files to be included in x86 tests
+ generate_test_data_code(
+ INPUT_DIR "${DOWNLOAD_DEP_DIR}/${use_case}"
+ DESTINATION_SRC ${TEST_SRC_GEN_DIR}
+ DESTINATION_HDR ${TEST_INC_GEN_DIR}
+ USECASE "${use_case}")
+ endif()
+
+else()
+ set(DEFAULT_MODEL_PATH "N/A")
+endif()
+
+set(EXTRA_MODEL_CODE
+ "/* Model parameters for ${use_case} */"
+ "extern const int g_FrameLength = 512"
+ "extern const int g_FrameStride = 160"
+ "extern const int g_ctxLen = 98"
+ "extern const float g_ScoreThreshold = ${${use_case}_MODEL_SCORE_THRESHOLD}"
+ )
+
+USER_OPTION(${use_case}_MODEL_TFLITE_PATH "NN models file to be used in the evaluation application. Model files must be in tflite format."
+ ${DEFAULT_MODEL_PATH}
+ FILEPATH
+ )
+
+# Generate model file
+generate_tflite_code(
+ MODEL_PATH ${${use_case}_MODEL_TFLITE_PATH}
+ DESTINATION ${SRC_GEN_DIR}
+ EXPRESSIONS ${EXTRA_MODEL_CODE}
+ )
diff --git a/source/use_case/img_class/include/MobileNetModel.hpp b/source/use_case/img_class/include/MobileNetModel.hpp
new file mode 100644
index 0000000..f0521ce
--- /dev/null
+++ b/source/use_case/img_class/include/MobileNetModel.hpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef IMG_CLASS_MOBILENETMODEL_HPP
+#define IMG_CLASS_MOBILENETMODEL_HPP
+
+#include "Model.hpp"
+
+namespace arm {
+namespace app {
+
+ class MobileNetModel : public Model {
+
+ public:
+ /* Indices for the expected model - based on input tensor shape */
+ static constexpr uint32_t ms_inputRowsIdx = 1;
+ static constexpr uint32_t ms_inputColsIdx = 2;
+ static constexpr uint32_t ms_inputChannelsIdx = 3;
+
+ protected:
+ /** @brief Gets the reference to op resolver interface class. */
+ const tflite::MicroOpResolver& GetOpResolver() override;
+
+ /** @brief Adds operations to the op resolver instance. */
+ bool EnlistOperations() override;
+
+ const uint8_t* ModelPointer() override;
+
+ size_t ModelSize() override;
+
+ private:
+ /* Maximum number of individual operations that can be enlisted. */
+ static constexpr int _ms_maxOpCnt = 7;
+
+ /* A mutable op resolver instance. */
+ tflite::MicroMutableOpResolver<_ms_maxOpCnt> _m_opResolver;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* IMG_CLASS_MOBILENETMODEL_HPP */ \ No newline at end of file
diff --git a/source/use_case/img_class/include/UseCaseHandler.hpp b/source/use_case/img_class/include/UseCaseHandler.hpp
new file mode 100644
index 0000000..a6cf104
--- /dev/null
+++ b/source/use_case/img_class/include/UseCaseHandler.hpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef IMG_CLASS_EVT_HANDLER_HPP
+#define IMG_CLASS_EVT_HANDLER_HPP
+
+#include "AppContext.hpp"
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief Handles the inference event.
+ * @param[in] ctx Pointer to the application context.
+ * @param[in] imgIndex Index to the image to classify.
+ * @param[in] runAll Flag to request classification of all the available images.
+ * @return true or false based on execution success.
+ **/
+ bool ClassifyImageHandler(ApplicationContext& ctx, uint32_t imgIndex, bool runAll);
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* IMG_CLASS_EVT_HANDLER_HPP */ \ No newline at end of file
diff --git a/source/use_case/img_class/src/MainLoop.cc b/source/use_case/img_class/src/MainLoop.cc
new file mode 100644
index 0000000..469907c
--- /dev/null
+++ b/source/use_case/img_class/src/MainLoop.cc
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "hal.h" /* Brings in platform definitions. */
+#include "Classifier.hpp" /* Classifier. */
+#include "InputFiles.hpp" /* For input images. */
+#include "Labels.hpp" /* For label strings. */
+#include "MobileNetModel.hpp" /* Model class for running inference. */
+#include "UseCaseHandler.hpp" /* Handlers for different user options. */
+#include "UseCaseCommonUtils.hpp" /* Utils functions. */
+
+using ImgClassClassifier = arm::app::Classifier;
+
+enum opcodes
+{
+ MENU_OPT_RUN_INF_NEXT = 1, /* Run on next vector. */
+ MENU_OPT_RUN_INF_CHOSEN, /* Run on a user provided vector index. */
+ MENU_OPT_RUN_INF_ALL, /* Run inference on all. */
+ MENU_OPT_SHOW_MODEL_INFO, /* Show model info. */
+ MENU_OPT_LIST_IMAGES /* List the current baked images. */
+};
+
+static void DisplayMenu()
+{
+ printf("\n\nUser input required\n");
+ printf("Enter option number from:\n\n");
+ printf(" %u. Classify next image\n", MENU_OPT_RUN_INF_NEXT);
+ printf(" %u. Classify image at chosen index\n", MENU_OPT_RUN_INF_CHOSEN);
+ printf(" %u. Run classification on all images\n", MENU_OPT_RUN_INF_ALL);
+ printf(" %u. Show NN model info\n", MENU_OPT_SHOW_MODEL_INFO);
+ printf(" %u. List images\n\n", MENU_OPT_LIST_IMAGES);
+ printf(" Choice: ");
+}
+
+void main_loop(hal_platform& platform)
+{
+ arm::app::MobileNetModel model; /* Model wrapper object. */
+
+ /* Load the model. */
+ if (!model.Init()) {
+ printf_err("Failed to initialise model\n");
+ return;
+ }
+
+ /* Instantiate application context. */
+ arm::app::ApplicationContext caseContext;
+
+ caseContext.Set<hal_platform&>("platform", platform);
+ caseContext.Set<arm::app::Model&>("model", model);
+ caseContext.Set<uint32_t>("imgIndex", 0);
+
+ ImgClassClassifier classifier; /* Classifier wrapper object. */
+ caseContext.Set<arm::app::Classifier&>("classifier", classifier);
+
+ std::vector <std::string> labels;
+ GetLabelsVector(labels);
+ caseContext.Set<const std::vector <std::string>&>("labels", labels);
+
+ /* Loop. */
+ bool executionSuccessful = true;
+ constexpr bool bUseMenu = NUMBER_OF_FILES > 1 ? true : false;
+
+ /* Loop. */
+ do {
+ int menuOption = MENU_OPT_RUN_INF_NEXT;
+ if (bUseMenu) {
+ DisplayMenu();
+ menuOption = arm::app::ReadUserInputAsInt(platform);
+ printf("\n");
+ }
+ switch (menuOption) {
+ case MENU_OPT_RUN_INF_NEXT:
+ executionSuccessful = ClassifyImageHandler(caseContext, caseContext.Get<uint32_t>("imgIndex"), false);
+ break;
+ case MENU_OPT_RUN_INF_CHOSEN: {
+ printf(" Enter the image index [0, %d]: ", NUMBER_OF_FILES-1);
+ auto imgIndex = static_cast<uint32_t>(arm::app::ReadUserInputAsInt(platform));
+ executionSuccessful = ClassifyImageHandler(caseContext, imgIndex, false);
+ break;
+ }
+ case MENU_OPT_RUN_INF_ALL:
+ executionSuccessful = ClassifyImageHandler(caseContext, caseContext.Get<uint32_t>("imgIndex"), true);
+ break;
+ case MENU_OPT_SHOW_MODEL_INFO:
+ executionSuccessful = model.ShowModelInfoHandler();
+ break;
+ case MENU_OPT_LIST_IMAGES:
+ executionSuccessful = ListFilesHandler(caseContext);
+ break;
+ default:
+ printf("Incorrect choice, try again.");
+ break;
+ }
+ } while (executionSuccessful && bUseMenu);
+ info("Main loop terminated.\n");
+} \ No newline at end of file
diff --git a/source/use_case/img_class/src/MobileNetModel.cc b/source/use_case/img_class/src/MobileNetModel.cc
new file mode 100644
index 0000000..eeaa109
--- /dev/null
+++ b/source/use_case/img_class/src/MobileNetModel.cc
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "MobileNetModel.hpp"
+
+#include "hal.h"
+
+const tflite::MicroOpResolver& arm::app::MobileNetModel::GetOpResolver()
+{
+ return this->_m_opResolver;
+}
+
+bool arm::app::MobileNetModel::EnlistOperations()
+{
+ this->_m_opResolver.AddDepthwiseConv2D();
+ this->_m_opResolver.AddConv2D();
+ this->_m_opResolver.AddAveragePool2D();
+ this->_m_opResolver.AddAdd();
+ this->_m_opResolver.AddReshape();
+ this->_m_opResolver.AddSoftmax();
+
+#if defined(ARM_NPU)
+ if (kTfLiteOk == this->_m_opResolver.AddEthosU()) {
+ info("Added %s support to op resolver\n",
+ tflite::GetString_ETHOSU());
+ } else {
+ printf_err("Failed to add Arm NPU support to op resolver.");
+ return false;
+ }
+#endif /* ARM_NPU */
+ return true;
+}
+
+extern uint8_t* GetModelPointer();
+const uint8_t* arm::app::MobileNetModel::ModelPointer()
+{
+ return GetModelPointer();
+}
+
+extern size_t GetModelLen();
+size_t arm::app::MobileNetModel::ModelSize()
+{
+ return GetModelLen();
+} \ No newline at end of file
diff --git a/source/use_case/img_class/src/UseCaseHandler.cc b/source/use_case/img_class/src/UseCaseHandler.cc
new file mode 100644
index 0000000..a412fec
--- /dev/null
+++ b/source/use_case/img_class/src/UseCaseHandler.cc
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "UseCaseHandler.hpp"
+
+#include "Classifier.hpp"
+#include "InputFiles.hpp"
+#include "MobileNetModel.hpp"
+#include "UseCaseCommonUtils.hpp"
+#include "hal.h"
+
+using ImgClassClassifier = arm::app::Classifier;
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief Helper function to load the current image into the input
+ * tensor.
+ * @param[in] imIdx Image index (from the pool of images available
+ * to the application).
+ * @param[out] inputTensor Pointer to the input tensor to be populated.
+ * @return true if tensor is loaded, false otherwise.
+ **/
+ static bool _LoadImageIntoTensor(uint32_t imIdx, TfLiteTensor* inputTensor);
+
+ /**
+ * @brief Helper function to increment current image index.
+ * @param[in,out] ctx Pointer to the application context object.
+ **/
+ static void _IncrementAppCtxImageIdx(ApplicationContext& ctx);
+
+ /**
+ * @brief Helper function to set the image index.
+ * @param[in,out] ctx Pointer to the application context object.
+ * @param[in] idx Value to be set.
+ * @return true if index is set, false otherwise.
+ **/
+ static bool _SetAppCtxImageIdx(ApplicationContext& ctx, uint32_t idx);
+
+ /**
+ * @brief Presents inference results using the data presentation
+ * object.
+ * @param[in] platform Reference to the hal platform object.
+ * @param[in] results Vector of classification results to be displayed.
+ * @param[in] infTimeMs Inference time in milliseconds, if available
+ * otherwise, this can be passed in as 0.
+ * @return true if successful, false otherwise.
+ **/
+ static bool _PresentInferenceResult(hal_platform& platform,
+ const std::vector<ClassificationResult>& results);
+
+ /**
+ * @brief Helper function to convert a UINT8 image to INT8 format.
+ * @param[in,out] data Pointer to the data start.
+ * @param[in] kMaxImageSize Total number of pixels in the image.
+ **/
+ static void ConvertImgToInt8(void* data, size_t kMaxImageSize);
+
+ /* Image inference classification handler. */
+ bool ClassifyImageHandler(ApplicationContext& ctx, uint32_t imgIndex, bool runAll)
+ {
+ auto& platform = ctx.Get<hal_platform&>("platform");
+
+ constexpr uint32_t dataPsnImgDownscaleFactor = 2;
+ constexpr uint32_t dataPsnImgStartX = 10;
+ constexpr uint32_t dataPsnImgStartY = 35;
+
+ constexpr uint32_t dataPsnTxtInfStartX = 150;
+ constexpr uint32_t dataPsnTxtInfStartY = 40;
+
+ platform.data_psn->clear(COLOR_BLACK);
+
+ auto& model = ctx.Get<Model&>("model");
+
+ /* If the request has a valid size, set the image index. */
+ if (imgIndex < NUMBER_OF_FILES) {
+ if (!_SetAppCtxImageIdx(ctx, imgIndex)) {
+ return false;
+ }
+ }
+ if (!model.IsInited()) {
+ printf_err("Model is not initialised! Terminating processing.\n");
+ return false;
+ }
+
+ auto curImIdx = ctx.Get<uint32_t>("imgIndex");
+
+ TfLiteTensor* outputTensor = model.GetOutputTensor(0);
+ TfLiteTensor* inputTensor = model.GetInputTensor(0);
+
+ if (!inputTensor->dims) {
+ printf_err("Invalid input tensor dims\n");
+ return false;
+ } else if (inputTensor->dims->size < 3) {
+ printf_err("Input tensor dimension should be >= 3\n");
+ return false;
+ }
+
+ TfLiteIntArray* inputShape = model.GetInputShape(0);
+
+ const uint32_t nCols = inputShape->data[arm::app::MobileNetModel::ms_inputColsIdx];
+ const uint32_t nRows = inputShape->data[arm::app::MobileNetModel::ms_inputRowsIdx];
+ const uint32_t nChannels = inputShape->data[arm::app::MobileNetModel::ms_inputChannelsIdx];
+
+ std::vector<ClassificationResult> results;
+
+ do {
+ /* Strings for presentation/logging. */
+ std::string str_inf{"Running inference... "};
+
+ /* Copy over the data. */
+ _LoadImageIntoTensor(ctx.Get<uint32_t>("imgIndex"), inputTensor);
+
+ /* Display this image on the LCD. */
+ platform.data_psn->present_data_image(
+ (uint8_t*) inputTensor->data.data,
+ nCols, nRows, nChannels,
+ dataPsnImgStartX, dataPsnImgStartY, dataPsnImgDownscaleFactor);
+
+ /* If the data is signed. */
+ if (model.IsDataSigned()) {
+ ConvertImgToInt8(inputTensor->data.data, inputTensor->bytes);
+ }
+
+ /* Display message on the LCD - inference running. */
+ platform.data_psn->present_data_text(str_inf.c_str(), str_inf.size(),
+ dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+
+ /* Run inference over this image. */
+ info("Running inference on image %u => %s\n", ctx.Get<uint32_t>("imgIndex"),
+ get_filename(ctx.Get<uint32_t>("imgIndex")));
+
+ RunInference(platform, model);
+
+ /* Erase. */
+ str_inf = std::string(str_inf.size(), ' ');
+ platform.data_psn->present_data_text(str_inf.c_str(), str_inf.size(),
+ dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+
+ auto& classifier = ctx.Get<ImgClassClassifier&>("classifier");
+ classifier.GetClassificationResults(outputTensor, results,
+ ctx.Get<std::vector <std::string>&>("labels"),
+ 5);
+
+ /* Add results to context for access outside handler. */
+ ctx.Set<std::vector<ClassificationResult>>("results", results);
+
+#if VERIFY_TEST_OUTPUT
+ arm::app::DumpTensor(outputTensor);
+#endif /* VERIFY_TEST_OUTPUT */
+
+ if (!_PresentInferenceResult(platform, results)) {
+ return false;
+ }
+
+ _IncrementAppCtxImageIdx(ctx);
+
+ } while (runAll && ctx.Get<uint32_t>("imgIndex") != curImIdx);
+
+ return true;
+ }
+
+ static bool _LoadImageIntoTensor(const uint32_t imIdx, TfLiteTensor* inputTensor)
+ {
+ const size_t copySz = inputTensor->bytes < IMAGE_DATA_SIZE ?
+ inputTensor->bytes : IMAGE_DATA_SIZE;
+ const uint8_t* imgSrc = get_img_array(imIdx);
+ if (nullptr == imgSrc) {
+ printf_err("Failed to get image index %u (max: %u)\n", imIdx,
+ NUMBER_OF_FILES - 1);
+ return false;
+ }
+
+ memcpy(inputTensor->data.data, imgSrc, copySz);
+ debug("Image %u loaded\n", imIdx);
+ return true;
+ }
+
+ static void _IncrementAppCtxImageIdx(ApplicationContext& ctx)
+ {
+ auto curImIdx = ctx.Get<uint32_t>("imgIndex");
+
+ if (curImIdx + 1 >= NUMBER_OF_FILES) {
+ ctx.Set<uint32_t>("imgIndex", 0);
+ return;
+ }
+ ++curImIdx;
+ ctx.Set<uint32_t>("imgIndex", curImIdx);
+ }
+
+ static bool _SetAppCtxImageIdx(ApplicationContext& ctx, const uint32_t idx)
+ {
+ if (idx >= NUMBER_OF_FILES) {
+ printf_err("Invalid idx %u (expected less than %u)\n",
+ idx, NUMBER_OF_FILES);
+ return false;
+ }
+ ctx.Set<uint32_t>("imgIndex", idx);
+ return true;
+ }
+
+ static bool _PresentInferenceResult(hal_platform& platform,
+ const std::vector<ClassificationResult>& results)
+ {
+ constexpr uint32_t dataPsnTxtStartX1 = 150;
+ constexpr uint32_t dataPsnTxtStartY1 = 30;
+
+ constexpr uint32_t dataPsnTxtStartX2 = 10;
+ constexpr uint32_t dataPsnTxtStartY2 = 150;
+
+ constexpr uint32_t dataPsnTxtYIncr = 16; /* Row index increment. */
+
+ platform.data_psn->set_text_color(COLOR_GREEN);
+
+ /* Display each result. */
+ uint32_t rowIdx1 = dataPsnTxtStartY1 + 2 * dataPsnTxtYIncr;
+ uint32_t rowIdx2 = dataPsnTxtStartY2;
+
+ for (uint32_t i = 0; i < results.size(); ++i) {
+ std::string resultStr =
+ std::to_string(i + 1) + ") " +
+ std::to_string(results[i].m_labelIdx) +
+ " (" + std::to_string(results[i].m_normalisedVal) + ")";
+
+ platform.data_psn->present_data_text(
+ resultStr.c_str(), resultStr.size(),
+ dataPsnTxtStartX1, rowIdx1, 0);
+ rowIdx1 += dataPsnTxtYIncr;
+
+ resultStr = std::to_string(i + 1) + ") " + results[i].m_label;
+ platform.data_psn->present_data_text(
+ resultStr.c_str(), resultStr.size(),
+ dataPsnTxtStartX2, rowIdx2, 0);
+ rowIdx2 += dataPsnTxtYIncr;
+
+ info("%u) %u (%f) -> %s\n", i, results[i].m_labelIdx,
+ results[i].m_normalisedVal, results[i].m_label.c_str());
+ }
+
+ return true;
+ }
+
+ static void ConvertImgToInt8(void* data, const size_t kMaxImageSize)
+ {
+ auto* tmp_req_data = (uint8_t*) data;
+ auto* tmp_signed_req_data = (int8_t*) data;
+
+ for (size_t i = 0; i < kMaxImageSize; i++) {
+ tmp_signed_req_data[i] = (int8_t) (
+ (int32_t) (tmp_req_data[i]) - 128);
+ }
+ }
+
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/use_case/img_class/usecase.cmake b/source/use_case/img_class/usecase.cmake
new file mode 100644
index 0000000..440eabe
--- /dev/null
+++ b/source/use_case/img_class/usecase.cmake
@@ -0,0 +1,125 @@
+#----------------------------------------------------------------------------
+# Copyright (c) 2021 Arm Limited. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#----------------------------------------------------------------------------
+
+# If the path to a directory or source file has been defined,
+# get the type here (FILEPATH or PATH):
+if (DEFINED ${use_case}_FILE_PATH)
+ get_path_type(${${use_case}_FILE_PATH} PATH_TYPE)
+ # Set the default type if path is not a dir or file path (or undefined)
+ if (NOT ${PATH_TYPE} STREQUAL PATH AND NOT ${PATH_TYPE} STREQUAL FILEPATH)
+ message(FATAL_ERROR "Invalid ${use_case}_FILE_PATH. It should be a dir or file path.")
+ endif()
+else()
+ # Default is a directory path
+ set(PATH_TYPE PATH)
+endif()
+
+message(STATUS "${use_case}_FILE_PATH is of type: ${PATH_TYPE}")
+
+USER_OPTION(${use_case}_FILE_PATH "Directory with custom image files to use, or path to a single image, in the evaluation application"
+ ${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/samples/
+ ${PATH_TYPE})
+
+USER_OPTION(${use_case}_IMAGE_SIZE "Square image size in pixels. Images will be resized to this size."
+ 224
+ STRING)
+
+USER_OPTION(${use_case}_LABELS_TXT_FILE "Labels' txt file for the chosen model"
+ ${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/labels/labels_mobilenet_v2_1.0_224.txt
+ FILEPATH)
+
+# Generate input files
+generate_images_code("${${use_case}_FILE_PATH}"
+ ${SRC_GEN_DIR}
+ ${INC_GEN_DIR}
+ "${${use_case}_IMAGE_SIZE}")
+
+# Generate labels file
+set(${use_case}_LABELS_CPP_FILE Labels)
+generate_labels_code(
+ INPUT "${${use_case}_LABELS_TXT_FILE}"
+ DESTINATION_SRC ${SRC_GEN_DIR}
+ DESTINATION_HDR ${INC_GEN_DIR}
+ OUTPUT_FILENAME "${${use_case}_LABELS_CPP_FILE}"
+)
+
+USER_OPTION(${use_case}_ACTIVATION_BUF_SZ "Activation buffer size for the chosen model"
+ 0x00200000
+ STRING)
+
+# If there is no tflite file pointed to
+if (NOT DEFINED ${use_case}_MODEL_TFLITE_PATH)
+
+ set(MODEL_RESOURCES_DIR ${DOWNLOAD_DEP_DIR}/${use_case})
+ file(MAKE_DIRECTORY ${MODEL_RESOURCES_DIR})
+ set(MODEL_FILENAME mobilenet_v2_1.0_224_quantized_1_default_1.tflite)
+ set(DEFAULT_MODEL_PATH ${MODEL_RESOURCES_DIR}/${MODEL_FILENAME})
+
+ # Download the default model
+ set(ZOO_COMMON_SUBPATH "models/image_classification/mobilenet_v2_1.0_224/tflite_uint8")
+ set(ZOO_MODEL_SUBPATH "${ZOO_COMMON_SUBPATH}/${MODEL_FILENAME}")
+
+ download_file_from_modelzoo(${ZOO_MODEL_SUBPATH} ${DEFAULT_MODEL_PATH})
+
+ if (ETHOS_U55_ENABLED)
+ message(STATUS
+ "Ethos-U55 is enabled, but the model downloaded is not optimized by vela. "
+ "To use Ethos-U55 acceleration, optimise the downloaded model and pass it "
+ "as ${use_case}_MODEL_TFLITE_PATH to the CMake configuration.")
+ endif()
+
+ # If the target platform is native
+ if (${TARGET_PLATFORM} STREQUAL native)
+
+ # Download test vectors
+ set(ZOO_TEST_IFM_SUBPATH "${ZOO_COMMON_SUBPATH}/testing_input/input/0.npy")
+ set(ZOO_TEST_OFM_SUBPATH "${ZOO_COMMON_SUBPATH}/testing_output/output/0.npy")
+
+ set(${use_case}_TEST_IFM ${MODEL_RESOURCES_DIR}/ifm0.npy CACHE FILEPATH
+ "Input test vector for ${use_case}")
+ set(${use_case}_TEST_OFM ${MODEL_RESOURCES_DIR}/ofm0.npy CACHE FILEPATH
+ "Input test vector for ${use_case}")
+
+ download_file_from_modelzoo(${ZOO_TEST_IFM_SUBPATH} ${${use_case}_TEST_IFM})
+ download_file_from_modelzoo(${ZOO_TEST_OFM_SUBPATH} ${${use_case}_TEST_OFM})
+
+ set(TEST_SRC_GEN_DIR ${CMAKE_BINARY_DIR}/generated/${use_case}/tests/src)
+ set(TEST_INC_GEN_DIR ${CMAKE_BINARY_DIR}/generated/${use_case}/tests/include)
+ file(MAKE_DIRECTORY ${TEST_SRC_GEN_DIR} ${TEST_INC_GEN_DIR})
+
+ # Generate test data files to be included in x86 tests
+ generate_test_data_code(
+ INPUT_DIR "${DOWNLOAD_DEP_DIR}/${use_case}"
+ DESTINATION_SRC ${TEST_SRC_GEN_DIR}
+ DESTINATION_HDR ${TEST_INC_GEN_DIR}
+ USECASE "${use_case}")
+ endif()
+
+else()
+ set(DEFAULT_MODEL_PATH "N/A")
+endif()
+
+USER_OPTION(${use_case}_MODEL_TFLITE_PATH "NN models file to be used in the evaluation application. Model files must be in tflite format."
+ ${DEFAULT_MODEL_PATH}
+ FILEPATH
+ )
+
+# Generate model file
+generate_tflite_code(
+ MODEL_PATH ${${use_case}_MODEL_TFLITE_PATH}
+ DESTINATION ${SRC_GEN_DIR}
+ )
diff --git a/source/use_case/inference_runner/include/TestModel.hpp b/source/use_case/inference_runner/include/TestModel.hpp
new file mode 100644
index 0000000..0b3e9b9
--- /dev/null
+++ b/source/use_case/inference_runner/include/TestModel.hpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef INF_RUNNER_TESTMODEL_HPP
+#define INF_RUNNER_TESTMODEL_HPP
+
+#include "Model.hpp"
+
+namespace arm {
+namespace app {
+
+ class TestModel : public Model {
+
+ protected:
+ /** @brief Gets the reference to op resolver interface class. */
+ const tflite::AllOpsResolver& GetOpResolver() override;
+
+ /** @brief Adds operations to the op resolver instance, not needed as using AllOpsResolver. */
+ bool EnlistOperations() override {return false;}
+
+ const uint8_t* ModelPointer() override;
+
+ size_t ModelSize() override;
+
+ private:
+
+ /* No need to define individual ops at the cost of extra memory. */
+ tflite::AllOpsResolver _m_opResolver;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* INF_RUNNER_TESTMODEL_HPP */ \ No newline at end of file
diff --git a/source/use_case/inference_runner/include/UseCaseHandler.hpp b/source/use_case/inference_runner/include/UseCaseHandler.hpp
new file mode 100644
index 0000000..4962650
--- /dev/null
+++ b/source/use_case/inference_runner/include/UseCaseHandler.hpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef INF_RUNNER_EVT_HANDLER_HPP
+#define INF_RUNNER_EVT_HANDLER_HPP
+
+#include "AppContext.hpp"
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief Handles the inference event.
+ * @param[in] ctx Pointer to the application context.
+ * @return true or false based on execution success.
+ **/
+ bool RunInferenceHandler(ApplicationContext& ctx);
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* INF_RUNNER_EVT_HANDLER_HPP */ \ No newline at end of file
diff --git a/source/use_case/inference_runner/src/MainLoop.cc b/source/use_case/inference_runner/src/MainLoop.cc
new file mode 100644
index 0000000..b110a24
--- /dev/null
+++ b/source/use_case/inference_runner/src/MainLoop.cc
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "hal.h" /* Brings in platform definitions. */
+#include "TestModel.hpp" /* Model class for running inference. */
+#include "UseCaseHandler.hpp" /* Handlers for different user options. */
+#include "UseCaseCommonUtils.hpp" /* Utils functions. */
+
+enum opcodes
+{
+ MENU_OPT_RUN_INF_NEXT = 1, /* Run on next vector. */
+ MENU_OPT_SHOW_MODEL_INFO, /* Show model info. */
+};
+
+void main_loop(hal_platform& platform)
+{
+ arm::app::TestModel model; /* Model wrapper object. */
+
+ /* Load the model. */
+ if (!model.Init()) {
+ printf_err("Failed to initialise model\n");
+ return;
+ }
+
+ /* Instantiate application context. */
+ arm::app::ApplicationContext caseContext;
+
+ caseContext.Set<hal_platform&>("platform", platform);
+ caseContext.Set<arm::app::Model&>("model", model);
+ caseContext.Set<uint32_t>("imgIndex", 0);
+
+ /* Loop. */
+ if (RunInferenceHandler(caseContext)) {
+ info("Inference completed.\n");
+ } else {
+ printf_err("Inference failed.\n");
+ }
+}
diff --git a/source/use_case/inference_runner/src/TestModel.cc b/source/use_case/inference_runner/src/TestModel.cc
new file mode 100644
index 0000000..0926a96
--- /dev/null
+++ b/source/use_case/inference_runner/src/TestModel.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "TestModel.hpp"
+
+#include "hal.h"
+
+const tflite::AllOpsResolver& arm::app::TestModel::GetOpResolver()
+{
+ return this->_m_opResolver;
+}
+
+extern uint8_t* GetModelPointer();
+const uint8_t* arm::app::TestModel::ModelPointer()
+{
+ return GetModelPointer();
+}
+
+extern size_t GetModelLen();
+size_t arm::app::TestModel::ModelSize()
+{
+ return GetModelLen();
+} \ No newline at end of file
diff --git a/source/use_case/inference_runner/src/UseCaseHandler.cc b/source/use_case/inference_runner/src/UseCaseHandler.cc
new file mode 100644
index 0000000..ac4ea47
--- /dev/null
+++ b/source/use_case/inference_runner/src/UseCaseHandler.cc
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "UseCaseHandler.hpp"
+
+#include "TestModel.hpp"
+#include "UseCaseCommonUtils.hpp"
+#include "hal.h"
+
+#include <cstdlib>
+
+namespace arm {
+namespace app {
+
+ bool RunInferenceHandler(ApplicationContext& ctx)
+ {
+ auto& platform = ctx.Get<hal_platform&>("platform");
+ auto& model = ctx.Get<Model&>("model");
+
+ constexpr uint32_t dataPsnTxtInfStartX = 150;
+ constexpr uint32_t dataPsnTxtInfStartY = 40;
+
+ if (!model.IsInited()) {
+ printf_err("Model is not initialised! Terminating processing.\n");
+ return false;
+ }
+
+ const size_t numInputs = model.GetNumInputs();
+
+ /* Populate each input tensor with random data. */
+ for (size_t inputIndex = 0; inputIndex < numInputs; inputIndex++) {
+
+ TfLiteTensor* inputTensor = model.GetInputTensor(inputIndex);
+
+ debug("Populating input tensor %zu@%p\n", inputIndex, inputTensor);
+ debug("Total input size to be populated: %zu\n", inputTensor->bytes);
+
+ /* Create a random input. */
+ if (inputTensor->bytes > 0) {
+
+ uint8_t* tData = tflite::GetTensorData<uint8_t>(inputTensor);
+
+ for (size_t j = 0; j < inputTensor->bytes; ++j) {
+ tData[j] = static_cast<uint8_t>(std::rand() & 0xFF);
+ }
+ }
+ }
+
+ /* Strings for presentation/logging. */
+ std::string str_inf{"Running inference... "};
+
+ /* Display message on the LCD - inference running. */
+ platform.data_psn->present_data_text(
+ str_inf.c_str(), str_inf.size(),
+ dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+
+ RunInference(platform, model);
+
+ /* Erase. */
+ str_inf = std::string(str_inf.size(), ' ');
+ platform.data_psn->present_data_text(
+ str_inf.c_str(), str_inf.size(),
+ dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+
+#if VERIFY_TEST_OUTPUT
+ for (size_t outputIndex = 0; outputIndex < model.GetNumOutputs(); outputIndex++) {
+ arm::app::DumpTensor(model.GetOutputTensor(outputIndex));
+ }
+#endif /* VERIFY_TEST_OUTPUT */
+
+ return true;
+ }
+
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/use_case/inference_runner/usecase.cmake b/source/use_case/inference_runner/usecase.cmake
new file mode 100644
index 0000000..77b1ae1
--- /dev/null
+++ b/source/use_case/inference_runner/usecase.cmake
@@ -0,0 +1,57 @@
+#----------------------------------------------------------------------------
+# Copyright (c) 2021 Arm Limited. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#----------------------------------------------------------------------------
+
+USER_OPTION(${use_case}_ACTIVATION_BUF_SZ "Activation buffer size for the chosen model"
+ 0x00200000
+ STRING)
+
+generate_default_input_code(${INC_GEN_DIR})
+
+# If there is no tflite file pointed to
+if (NOT DEFINED ${use_case}_MODEL_TFLITE_PATH)
+
+ set(MODEL_RESOURCES_DIR ${DOWNLOAD_DEP_DIR}/${use_case})
+ file(MAKE_DIRECTORY ${MODEL_RESOURCES_DIR})
+ set(MODEL_FILENAME dnn_s_quantized.tflite)
+ set(DEFAULT_MODEL_PATH ${MODEL_RESOURCES_DIR}/${MODEL_FILENAME})
+
+ # Download the default model
+ set(ZOO_COMMON_SUBPATH "models/keyword_spotting/dnn_small/tflite_int8/")
+ set(ZOO_MODEL_SUBPATH "${ZOO_COMMON_SUBPATH}/${MODEL_FILENAME}")
+
+ download_file_from_modelzoo(${ZOO_MODEL_SUBPATH} ${DEFAULT_MODEL_PATH})
+
+ if (ETHOS_U55_ENABLED)
+ message(STATUS
+ "Ethos-U55 is enabled, but the model downloaded is not optimized by vela. "
+ "To use Ethos-U55 acceleration, optimise the downloaded model and pass it "
+ "as ${use_case}_MODEL_TFLITE_PATH to the CMake configuration.")
+ endif()
+
+else()
+ set(DEFAULT_MODEL_PATH "N/A")
+endif()
+
+USER_OPTION(${use_case}_MODEL_TFLITE_PATH "NN models file to be used in the evaluation application. Model files must be in tflite format."
+ ${DEFAULT_MODEL_PATH}
+ FILEPATH)
+
+# Generate model file
+generate_tflite_code(
+ MODEL_PATH ${${use_case}_MODEL_TFLITE_PATH}
+ DESTINATION ${SRC_GEN_DIR}
+)
diff --git a/source/use_case/kws/include/DsCnnMfcc.hpp b/source/use_case/kws/include/DsCnnMfcc.hpp
new file mode 100644
index 0000000..3f681af
--- /dev/null
+++ b/source/use_case/kws/include/DsCnnMfcc.hpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef KWS_DSCNN_MFCC_HPP
+#define KWS_DSCNN_MFCC_HPP
+
+#include "Mfcc.hpp"
+
+namespace arm {
+namespace app {
+namespace audio {
+
+ /* Class to provide DS-CNN specific MFCC calculation requirements. */
+ class DsCnnMFCC : public MFCC {
+
+ public:
+ static constexpr uint32_t ms_defaultSamplingFreq = 16000;
+ static constexpr uint32_t ms_defaultNumFbankBins = 40;
+ static constexpr uint32_t ms_defaultMelLoFreq = 20;
+ static constexpr uint32_t ms_defaultMelHiFreq = 4000;
+ static constexpr bool ms_defaultUseHtkMethod = true;
+
+ explicit DsCnnMFCC(const size_t numFeats, const size_t frameLen)
+ : MFCC(MfccParams(
+ ms_defaultSamplingFreq, ms_defaultNumFbankBins,
+ ms_defaultMelLoFreq, ms_defaultMelHiFreq,
+ numFeats, frameLen, ms_defaultUseHtkMethod))
+ {}
+ DsCnnMFCC() = delete;
+ ~DsCnnMFCC() = default;
+ };
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* KWS_DSCNN_MFCC_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws/include/DsCnnModel.hpp b/source/use_case/kws/include/DsCnnModel.hpp
new file mode 100644
index 0000000..a4e7110
--- /dev/null
+++ b/source/use_case/kws/include/DsCnnModel.hpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef KWS_DSCNNMODEL_HPP
+#define KWS_DSCNNMODEL_HPP
+
+#include "Model.hpp"
+
+extern const int g_FrameLength;
+extern const int g_FrameStride;
+extern const float g_ScoreThreshold;
+
+namespace arm {
+namespace app {
+
+ class DsCnnModel : public Model {
+ public:
+ /* Indices for the expected model - based on input and output tensor shapes */
+ static constexpr uint32_t ms_inputRowsIdx = 2;
+ static constexpr uint32_t ms_inputColsIdx = 3;
+ static constexpr uint32_t ms_outputRowsIdx = 2;
+ static constexpr uint32_t ms_outputColsIdx = 3;
+
+ protected:
+ /** @brief Gets the reference to op resolver interface class. */
+ const tflite::MicroOpResolver& GetOpResolver() override;
+
+ /** @brief Adds operations to the op resolver instance. */
+ bool EnlistOperations() override;
+
+ const uint8_t* ModelPointer() override;
+
+ size_t ModelSize() override;
+
+ private:
+ /* Maximum number of individual operations that can be enlisted. */
+ static constexpr int _ms_maxOpCnt = 8;
+
+ /* A mutable op resolver instance. */
+ tflite::MicroMutableOpResolver<_ms_maxOpCnt> _m_opResolver;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* KWS_DSCNNMODEL_HPP */
diff --git a/source/use_case/kws/include/KwsResult.hpp b/source/use_case/kws/include/KwsResult.hpp
new file mode 100644
index 0000000..5a26ce1
--- /dev/null
+++ b/source/use_case/kws/include/KwsResult.hpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef KWS_RESULT_HPP
+#define KWS_RESULT_HPP
+
+#include "ClassificationResult.hpp"
+
+#include <vector>
+
+namespace arm {
+namespace app {
+namespace kws {
+
+ using ResultVec = std::vector < arm::app::ClassificationResult >;
+
+ /* Structure for holding kws result. */
+ class KwsResult {
+
+ public:
+ ResultVec m_resultVec; /* Container for "thresholded" classification results. */
+ float m_timeStamp; /* Audio timestamp for this result. */
+ uint32_t m_inferenceNumber; /* Corresponding inference number. */
+ float m_threshold; /* Threshold value for `m_resultVec`. */
+
+ KwsResult() = delete;
+ KwsResult(ResultVec& resultVec,
+ const float timestamp,
+ const uint32_t inferenceIdx,
+ const float scoreThreshold) {
+
+ this->m_threshold = scoreThreshold;
+ this->m_timeStamp = timestamp;
+ this->m_inferenceNumber = inferenceIdx;
+
+ this->m_resultVec = ResultVec();
+ for (auto & i : resultVec) {
+ if (i.m_normalisedVal >= this->m_threshold) {
+ this->m_resultVec.emplace_back(i);
+ }
+ }
+ }
+ ~KwsResult() = default;
+ };
+
+} /* namespace kws */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* KWS_RESULT_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws/include/UseCaseHandler.hpp b/source/use_case/kws/include/UseCaseHandler.hpp
new file mode 100644
index 0000000..1eb742f
--- /dev/null
+++ b/source/use_case/kws/include/UseCaseHandler.hpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef KWS_EVT_HANDLER_HPP
+#define KWS_EVT_HANDLER_HPP
+
+#include "AppContext.hpp"
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief Handles the inference event.
+ * @param[in] ctx Pointer to the application context.
+ * @param[in] clipIndex Index to the audio clip to classify.
+ * @param[in] runAll Flag to request classification of all the available audio clips.
+ * @return true or false based on execution success.
+ **/
+ bool ClassifyAudioHandler(ApplicationContext& ctx, uint32_t clipIndex, bool runAll);
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* KWS_EVT_HANDLER_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws/src/DsCnnModel.cc b/source/use_case/kws/src/DsCnnModel.cc
new file mode 100644
index 0000000..a093eb4
--- /dev/null
+++ b/source/use_case/kws/src/DsCnnModel.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "DsCnnModel.hpp"
+
+#include "hal.h"
+
+const tflite::MicroOpResolver& arm::app::DsCnnModel::GetOpResolver()
+{
+ return this->_m_opResolver;
+}
+
+bool arm::app::DsCnnModel::EnlistOperations()
+{
+ this->_m_opResolver.AddReshape();
+ this->_m_opResolver.AddAveragePool2D();
+ this->_m_opResolver.AddConv2D();
+ this->_m_opResolver.AddDepthwiseConv2D();
+ this->_m_opResolver.AddFullyConnected();
+ this->_m_opResolver.AddRelu();
+ this->_m_opResolver.AddSoftmax();
+
+#if defined(ARM_NPU)
+ if (kTfLiteOk == this->_m_opResolver.AddEthosU()) {
+ info("Added %s support to op resolver\n",
+ tflite::GetString_ETHOSU());
+ } else {
+ printf_err("Failed to add Arm NPU support to op resolver.");
+ return false;
+ }
+#endif /* ARM_NPU */
+ return true;
+}
+
+extern uint8_t* GetModelPointer();
+const uint8_t* arm::app::DsCnnModel::ModelPointer()
+{
+ return GetModelPointer();
+}
+
+extern size_t GetModelLen();
+size_t arm::app::DsCnnModel::ModelSize()
+{
+ return GetModelLen();
+} \ No newline at end of file
diff --git a/source/use_case/kws/src/MainLoop.cc b/source/use_case/kws/src/MainLoop.cc
new file mode 100644
index 0000000..24cb939
--- /dev/null
+++ b/source/use_case/kws/src/MainLoop.cc
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "InputFiles.hpp" /* For input audio clips. */
+#include "Classifier.hpp" /* Classifier. */
+#include "DsCnnModel.hpp" /* Model class for running inference. */
+#include "hal.h" /* Brings in platform definitions. */
+#include "Labels.hpp" /* For label strings. */
+#include "UseCaseHandler.hpp" /* Handlers for different user options. */
+#include "UseCaseCommonUtils.hpp" /* Utils functions. */
+
+using KwsClassifier = arm::app::Classifier;
+
+enum opcodes
+{
+ MENU_OPT_RUN_INF_NEXT = 1, /* Run on next vector. */
+ MENU_OPT_RUN_INF_CHOSEN, /* Run on a user provided vector index. */
+ MENU_OPT_RUN_INF_ALL, /* Run inference on all. */
+ MENU_OPT_SHOW_MODEL_INFO, /* Show model info. */
+ MENU_OPT_LIST_AUDIO_CLIPS /* List the current baked audio clips. */
+};
+
+static void DisplayMenu()
+{
+ printf("\n\nUser input required\n");
+ printf("Enter option number from:\n\n");
+ printf(" %u. Classify next audio clip\n", MENU_OPT_RUN_INF_NEXT);
+ printf(" %u. Classify audio clip at chosen index\n", MENU_OPT_RUN_INF_CHOSEN);
+ printf(" %u. Run classification on all audio clips\n", MENU_OPT_RUN_INF_ALL);
+ printf(" %u. Show NN model info\n", MENU_OPT_SHOW_MODEL_INFO);
+ printf(" %u. List audio clips\n\n", MENU_OPT_LIST_AUDIO_CLIPS);
+ printf(" Choice: ");
+}
+
+void main_loop(hal_platform& platform)
+{
+ arm::app::DsCnnModel model; /* Model wrapper object. */
+
+ /* Load the model. */
+ if (!model.Init()) {
+ printf_err("Failed to initialise model\n");
+ return;
+ }
+
+ /* Instantiate application context. */
+ arm::app::ApplicationContext caseContext;
+
+ caseContext.Set<hal_platform&>("platform", platform);
+ caseContext.Set<arm::app::Model&>("model", model);
+ caseContext.Set<uint32_t>("clipIndex", 0);
+ caseContext.Set<int>("frameLength", g_FrameLength);
+ caseContext.Set<int>("frameStride", g_FrameStride);
+ caseContext.Set<float>("scoreThreshold", g_ScoreThreshold); /* Normalised score threshold. */
+
+ KwsClassifier classifier; /* classifier wrapper object. */
+ caseContext.Set<arm::app::Classifier&>("classifier", classifier);
+
+ std::vector <std::string> labels;
+ GetLabelsVector(labels);
+
+ caseContext.Set<const std::vector <std::string>&>("labels", labels);
+
+ bool executionSuccessful = true;
+ constexpr bool bUseMenu = NUMBER_OF_FILES > 1 ? true : false;
+
+ /* Loop. */
+ do {
+ int menuOption = MENU_OPT_RUN_INF_NEXT;
+ if (bUseMenu) {
+ DisplayMenu();
+ menuOption = arm::app::ReadUserInputAsInt(platform);
+ printf("\n");
+ }
+ switch (menuOption) {
+ case MENU_OPT_RUN_INF_NEXT:
+ executionSuccessful = ClassifyAudioHandler(caseContext, caseContext.Get<uint32_t>("clipIndex"), false);
+ break;
+ case MENU_OPT_RUN_INF_CHOSEN: {
+ printf(" Enter the audio clip index [0, %d]: ", NUMBER_OF_FILES-1);
+ auto clipIndex = static_cast<uint32_t>(arm::app::ReadUserInputAsInt(platform));
+ executionSuccessful = ClassifyAudioHandler(caseContext, clipIndex, false);
+ break;
+ }
+ case MENU_OPT_RUN_INF_ALL:
+ executionSuccessful = ClassifyAudioHandler(caseContext,caseContext.Get<uint32_t>("clipIndex"), true);
+ break;
+ case MENU_OPT_SHOW_MODEL_INFO:
+ executionSuccessful = model.ShowModelInfoHandler();
+ break;
+ case MENU_OPT_LIST_AUDIO_CLIPS:
+ executionSuccessful = ListFilesHandler(caseContext);
+ break;
+ default:
+ printf("Incorrect choice, try again.");
+ break;
+ }
+ } while (executionSuccessful && bUseMenu);
+ info("Main loop terminated.\n");
+} \ No newline at end of file
diff --git a/source/use_case/kws/src/UseCaseHandler.cc b/source/use_case/kws/src/UseCaseHandler.cc
new file mode 100644
index 0000000..872d323
--- /dev/null
+++ b/source/use_case/kws/src/UseCaseHandler.cc
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "UseCaseHandler.hpp"
+
+#include "InputFiles.hpp"
+#include "Classifier.hpp"
+#include "DsCnnModel.hpp"
+#include "hal.h"
+#include "DsCnnMfcc.hpp"
+#include "AudioUtils.hpp"
+#include "UseCaseCommonUtils.hpp"
+#include "KwsResult.hpp"
+
+#include <vector>
+#include <functional>
+
+using KwsClassifier = arm::app::Classifier;
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief Helper function to increment current audio clip index.
+ * @param[in,out] ctx Pointer to the application context object.
+ **/
+ static void _IncrementAppCtxClipIdx(ApplicationContext& ctx);
+
+ /**
+ * @brief Helper function to set the audio clip index.
+ * @param[in,out] ctx Pointer to the application context object.
+ * @param[in] idx Value to be set.
+ * @return true if index is set, false otherwise.
+ **/
+ static bool _SetAppCtxClipIdx(ApplicationContext& ctx, uint32_t idx);
+
+ /**
+ * @brief Presents inference results using the data presentation
+ * object.
+ * @param[in] platform Reference to the hal platform object.
+ * @param[in] results Vector of classification results to be displayed.
+ * @param[in] infTimeMs Inference time in milliseconds, if available,
+ * otherwise, this can be passed in as 0.
+ * @return true if successful, false otherwise.
+ **/
+ static bool _PresentInferenceResult(hal_platform& platform,
+ const std::vector<arm::app::kws::KwsResult>& results);
+
+ /**
+ * @brief Returns a function to perform feature calculation and populates input tensor data with
+ * MFCC data.
+ *
+ * Input tensor data type check is performed to choose correct MFCC feature data type.
+ * If tensor has an integer data type then original features are quantised.
+ *
+ * Warning: MFCC calculator provided as input must have the same life scope as returned function.
+ *
+ * @param[in] mfcc MFCC feature calculator.
+ * @param[in,out] inputTensor Input tensor pointer to store calculated features.
+ * @param[in] cacheSize Size of the feature vectors cache (number of feature vectors).
+ * @return Function to be called providing audio sample and sliding window index.
+ */
+ static std::function<void (std::vector<int16_t>&, int, bool, size_t)>
+ GetFeatureCalculator(audio::DsCnnMFCC& mfcc,
+ TfLiteTensor* inputTensor,
+ size_t cacheSize);
+
+ /* Audio inference handler. */
+ bool ClassifyAudioHandler(ApplicationContext& ctx, uint32_t clipIndex, bool runAll)
+ {
+ auto& platform = ctx.Get<hal_platform&>("platform");
+
+ constexpr uint32_t dataPsnTxtInfStartX = 20;
+ constexpr uint32_t dataPsnTxtInfStartY = 40;
+ constexpr int minTensorDims = static_cast<int>(
+ (arm::app::DsCnnModel::ms_inputRowsIdx > arm::app::DsCnnModel::ms_inputColsIdx)?
+ arm::app::DsCnnModel::ms_inputRowsIdx : arm::app::DsCnnModel::ms_inputColsIdx);
+
+ platform.data_psn->clear(COLOR_BLACK);
+
+ auto& model = ctx.Get<Model&>("model");
+
+ /* If the request has a valid size, set the audio index. */
+ if (clipIndex < NUMBER_OF_FILES) {
+ if (!_SetAppCtxClipIdx(ctx, clipIndex)) {
+ return false;
+ }
+ }
+ if (!model.IsInited()) {
+ printf_err("Model is not initialised! Terminating processing.\n");
+ return false;
+ }
+
+ const auto frameLength = ctx.Get<int>("frameLength");
+ const auto frameStride = ctx.Get<int>("frameStride");
+ const auto scoreThreshold = ctx.Get<float>("scoreThreshold");
+ auto startClipIdx = ctx.Get<uint32_t>("clipIndex");
+
+ TfLiteTensor* outputTensor = model.GetOutputTensor(0);
+ TfLiteTensor* inputTensor = model.GetInputTensor(0);
+
+ if (!inputTensor->dims) {
+ printf_err("Invalid input tensor dims\n");
+ return false;
+ } else if (inputTensor->dims->size < minTensorDims) {
+ printf_err("Input tensor dimension should be >= %d\n", minTensorDims);
+ return false;
+ }
+
+ TfLiteIntArray* inputShape = model.GetInputShape(0);
+ const uint32_t kNumCols = inputShape->data[arm::app::DsCnnModel::ms_inputColsIdx];
+ const uint32_t kNumRows = inputShape->data[arm::app::DsCnnModel::ms_inputRowsIdx];
+
+ audio::DsCnnMFCC mfcc = audio::DsCnnMFCC(kNumCols, frameLength);
+ mfcc.Init();
+
+ /* Deduce the data length required for 1 inference from the network parameters. */
+ auto audioDataWindowSize = kNumRows * frameStride + (frameLength - frameStride);
+ auto mfccWindowSize = frameLength;
+ auto mfccWindowStride = frameStride;
+
+ /* We choose to move by half the window size => for a 1 second window size
+ * there is an overlap of 0.5 seconds. */
+ auto audioDataStride = audioDataWindowSize / 2;
+
+ /* To have the previously calculated features re-usable, stride must be multiple
+ * of MFCC features window stride. */
+ if (0 != audioDataStride % mfccWindowStride) {
+
+ /* Reduce the stride. */
+ audioDataStride -= audioDataStride % mfccWindowStride;
+ }
+
+ auto nMfccVectorsInAudioStride = audioDataStride/mfccWindowStride;
+
+ /* We expect to be sampling 1 second worth of data at a time.
+ * NOTE: This is only used for time stamp calculation. */
+ const float secondsPerSample = 1.0/audio::DsCnnMFCC::ms_defaultSamplingFreq;
+
+ do {
+ auto currentIndex = ctx.Get<uint32_t>("clipIndex");
+
+ /* Creating a mfcc features sliding window for the data required for 1 inference. */
+ auto audioMFCCWindowSlider = audio::SlidingWindow<const int16_t>(
+ get_audio_array(currentIndex),
+ audioDataWindowSize, mfccWindowSize,
+ mfccWindowStride);
+
+ /* Creating a sliding window through the whole audio clip. */
+ auto audioDataSlider = audio::SlidingWindow<const int16_t>(
+ get_audio_array(currentIndex),
+ get_audio_array_size(currentIndex),
+ audioDataWindowSize, audioDataStride);
+
+ /* Calculate number of the feature vectors in the window overlap region.
+ * These feature vectors will be reused.*/
+ auto numberOfReusedFeatureVectors = audioMFCCWindowSlider.TotalStrides() + 1
+ - nMfccVectorsInAudioStride;
+
+ /* Construct feature calculation function. */
+ auto mfccFeatureCalc = GetFeatureCalculator(mfcc, inputTensor,
+ numberOfReusedFeatureVectors);
+
+ if (!mfccFeatureCalc){
+ return false;
+ }
+
+ /* Declare a container for results. */
+ std::vector<arm::app::kws::KwsResult> results;
+
+ /* Display message on the LCD - inference running. */
+ std::string str_inf{"Running inference... "};
+ platform.data_psn->present_data_text(
+ str_inf.c_str(), str_inf.size(),
+ dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+ info("Running inference on audio clip %u => %s\n", currentIndex,
+ get_filename(currentIndex));
+
+ /* Start sliding through audio clip. */
+ while (audioDataSlider.HasNext()) {
+ const int16_t *inferenceWindow = audioDataSlider.Next();
+
+ /* We moved to the next window - set the features sliding to the new address. */
+ audioMFCCWindowSlider.Reset(inferenceWindow);
+
+ /* The first window does not have cache ready. */
+ bool useCache = audioDataSlider.Index() > 0 && numberOfReusedFeatureVectors > 0;
+
+ /* Start calculating features inside one audio sliding window. */
+ while (audioMFCCWindowSlider.HasNext()) {
+ const int16_t *mfccWindow = audioMFCCWindowSlider.Next();
+ std::vector<int16_t> mfccAudioData = std::vector<int16_t>(mfccWindow,
+ mfccWindow + mfccWindowSize);
+ /* Compute features for this window and write them to input tensor. */
+ mfccFeatureCalc(mfccAudioData,
+ audioMFCCWindowSlider.Index(),
+ useCache,
+ nMfccVectorsInAudioStride);
+ }
+
+ info("Inference %zu/%zu\n", audioDataSlider.Index() + 1,
+ audioDataSlider.TotalStrides() + 1);
+
+ /* Run inference over this audio clip sliding window. */
+ arm::app::RunInference(platform, model);
+
+ std::vector<ClassificationResult> classificationResult;
+ auto& classifier = ctx.Get<KwsClassifier&>("classifier");
+ classifier.GetClassificationResults(outputTensor, classificationResult,
+ ctx.Get<std::vector<std::string>&>("labels"), 1);
+
+ results.emplace_back(kws::KwsResult(classificationResult,
+ audioDataSlider.Index() * secondsPerSample * audioDataStride,
+ audioDataSlider.Index(), scoreThreshold));
+
+#if VERIFY_TEST_OUTPUT
+ arm::app::DumpTensor(outputTensor);
+#endif /* VERIFY_TEST_OUTPUT */
+ } /* while (audioDataSlider.HasNext()) */
+
+ /* Erase. */
+ str_inf = std::string(str_inf.size(), ' ');
+ platform.data_psn->present_data_text(
+ str_inf.c_str(), str_inf.size(),
+ dataPsnTxtInfStartX, dataPsnTxtInfStartY, false);
+
+ ctx.Set<std::vector<arm::app::kws::KwsResult>>("results", results);
+
+ if (!_PresentInferenceResult(platform, results)) {
+ return false;
+ }
+
+ _IncrementAppCtxClipIdx(ctx);
+
+ } while (runAll && ctx.Get<uint32_t>("clipIndex") != startClipIdx);
+
+ return true;
+ }
+
+ static void _IncrementAppCtxClipIdx(ApplicationContext& ctx)
+ {
+ auto curAudioIdx = ctx.Get<uint32_t>("clipIndex");
+
+ if (curAudioIdx + 1 >= NUMBER_OF_FILES) {
+ ctx.Set<uint32_t>("clipIndex", 0);
+ return;
+ }
+ ++curAudioIdx;
+ ctx.Set<uint32_t>("clipIndex", curAudioIdx);
+ }
+
+ static bool _SetAppCtxClipIdx(ApplicationContext& ctx, const uint32_t idx)
+ {
+ if (idx >= NUMBER_OF_FILES) {
+ printf_err("Invalid idx %u (expected less than %u)\n",
+ idx, NUMBER_OF_FILES);
+ return false;
+ }
+ ctx.Set<uint32_t>("clipIndex", idx);
+ return true;
+ }
+
+ static bool _PresentInferenceResult(hal_platform& platform,
+ const std::vector<arm::app::kws::KwsResult>& results)
+ {
+ constexpr uint32_t dataPsnTxtStartX1 = 20;
+ constexpr uint32_t dataPsnTxtStartY1 = 30;
+ constexpr uint32_t dataPsnTxtYIncr = 16; /* Row index increment. */
+
+ platform.data_psn->set_text_color(COLOR_GREEN);
+
+ /* Display each result */
+ uint32_t rowIdx1 = dataPsnTxtStartY1 + 2 * dataPsnTxtYIncr;
+
+ for (uint32_t i = 0; i < results.size(); ++i) {
+
+ std::string topKeyword{"<none>"};
+ float score = 0.f;
+
+ if (results[i].m_resultVec.size()) {
+ topKeyword = results[i].m_resultVec[0].m_label;
+ score = results[i].m_resultVec[0].m_normalisedVal;
+ }
+
+ std::string resultStr =
+ std::string{"@"} + std::to_string(results[i].m_timeStamp) +
+ std::string{"s: "} + topKeyword + std::string{" ("} +
+ std::to_string(static_cast<int>(score * 100)) + std::string{"%)"};
+
+ platform.data_psn->present_data_text(
+ resultStr.c_str(), resultStr.size(),
+ dataPsnTxtStartX1, rowIdx1, false);
+ rowIdx1 += dataPsnTxtYIncr;
+
+ info("For timestamp: %f (inference #: %u); threshold: %f\n",
+ results[i].m_timeStamp, results[i].m_inferenceNumber,
+ results[i].m_threshold);
+ for (uint32_t j = 0; j < results[i].m_resultVec.size(); ++j) {
+ info("\t\tlabel @ %u: %s, score: %f\n", j,
+ results[i].m_resultVec[j].m_label.c_str(),
+ results[i].m_resultVec[j].m_normalisedVal);
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * @brief Generic feature calculator factory.
+ *
+ * Returns lambda function to compute features using features cache.
+ * Real features math is done by a lambda function provided as a parameter.
+ * Features are written to input tensor memory.
+ *
+ * @tparam T Feature vector type.
+ * @param inputTensor Model input tensor pointer.
+ * @param cacheSize Number of feature vectors to cache. Defined by the sliding window overlap.
+ * @param compute Features calculator function.
+ * @return Lambda function to compute features.
+ */
+ template<class T>
+ std::function<void (std::vector<int16_t>&, size_t, bool, size_t)>
+ _FeatureCalc(TfLiteTensor* inputTensor, size_t cacheSize,
+ std::function<std::vector<T> (std::vector<int16_t>& )> compute)
+ {
+ /* Feature cache to be captured by lambda function. */
+ static std::vector<std::vector<T>> featureCache = std::vector<std::vector<T>>(cacheSize);
+
+ return [=](std::vector<int16_t>& audioDataWindow,
+ size_t index,
+ bool useCache,
+ size_t featuresOverlapIndex)
+ {
+ T *tensorData = tflite::GetTensorData<T>(inputTensor);
+ std::vector<T> features;
+
+ /* Reuse features from cache if cache is ready and sliding windows overlap.
+ * Overlap is in the beginning of sliding window with a size of a feature cache. */
+ if (useCache && index < featureCache.size()) {
+ features = std::move(featureCache[index]);
+ } else {
+ features = std::move(compute(audioDataWindow));
+ }
+ auto size = features.size();
+ auto sizeBytes = sizeof(T) * size;
+ std::memcpy(tensorData + (index * size), features.data(), sizeBytes);
+
+ /* Start renewing cache as soon iteration goes out of the windows overlap. */
+ if (index >= featuresOverlapIndex) {
+ featureCache[index - featuresOverlapIndex] = std::move(features);
+ }
+ };
+ }
+
+ template std::function<void (std::vector<int16_t>&, size_t , bool, size_t)>
+ _FeatureCalc<int8_t>(TfLiteTensor* inputTensor,
+ size_t cacheSize,
+ std::function<std::vector<int8_t> (std::vector<int16_t>& )> compute);
+
+ template std::function<void (std::vector<int16_t>&, size_t , bool, size_t)>
+ _FeatureCalc<uint8_t>(TfLiteTensor* inputTensor,
+ size_t cacheSize,
+ std::function<std::vector<uint8_t> (std::vector<int16_t>& )> compute);
+
+ template std::function<void (std::vector<int16_t>&, size_t , bool, size_t)>
+ _FeatureCalc<int16_t>(TfLiteTensor* inputTensor,
+ size_t cacheSize,
+ std::function<std::vector<int16_t> (std::vector<int16_t>& )> compute);
+
+ template std::function<void(std::vector<int16_t>&, size_t, bool, size_t)>
+ _FeatureCalc<float>(TfLiteTensor *inputTensor,
+ size_t cacheSize,
+ std::function<std::vector<float>(std::vector<int16_t>&)> compute);
+
+
+ static std::function<void (std::vector<int16_t>&, int, bool, size_t)>
+ GetFeatureCalculator(audio::DsCnnMFCC& mfcc, TfLiteTensor* inputTensor, size_t cacheSize)
+ {
+ std::function<void (std::vector<int16_t>&, size_t, bool, size_t)> mfccFeatureCalc;
+
+ TfLiteQuantization quant = inputTensor->quantization;
+
+ if (kTfLiteAffineQuantization == quant.type) {
+
+ auto *quantParams = (TfLiteAffineQuantization *) quant.params;
+ const float quantScale = quantParams->scale->data[0];
+ const int quantOffset = quantParams->zero_point->data[0];
+
+ switch (inputTensor->type) {
+ case kTfLiteInt8: {
+ mfccFeatureCalc = _FeatureCalc<int8_t>(inputTensor,
+ cacheSize,
+ [=, &mfcc](std::vector<int16_t>& audioDataWindow) {
+ return mfcc.MfccComputeQuant<int8_t>(audioDataWindow,
+ quantScale,
+ quantOffset);
+ }
+ );
+ break;
+ }
+ case kTfLiteUInt8: {
+ mfccFeatureCalc = _FeatureCalc<uint8_t>(inputTensor,
+ cacheSize,
+ [=, &mfcc](std::vector<int16_t>& audioDataWindow) {
+ return mfcc.MfccComputeQuant<uint8_t>(audioDataWindow,
+ quantScale,
+ quantOffset);
+ }
+ );
+ break;
+ }
+ case kTfLiteInt16: {
+ mfccFeatureCalc = _FeatureCalc<int16_t>(inputTensor,
+ cacheSize,
+ [=, &mfcc](std::vector<int16_t>& audioDataWindow) {
+ return mfcc.MfccComputeQuant<int16_t>(audioDataWindow,
+ quantScale,
+ quantOffset);
+ }
+ );
+ break;
+ }
+ default:
+ printf_err("Tensor type %s not supported\n", TfLiteTypeGetName(inputTensor->type));
+ }
+
+
+ } else {
+ mfccFeatureCalc = mfccFeatureCalc = _FeatureCalc<float>(inputTensor,
+ cacheSize,
+ [&mfcc](std::vector<int16_t>& audioDataWindow) {
+ return mfcc.MfccCompute(audioDataWindow);
+ });
+ }
+ return mfccFeatureCalc;
+ }
+
+} /* namespace app */
+} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/kws/usecase.cmake b/source/use_case/kws/usecase.cmake
new file mode 100644
index 0000000..b5ac09e
--- /dev/null
+++ b/source/use_case/kws/usecase.cmake
@@ -0,0 +1,159 @@
+#----------------------------------------------------------------------------
+# Copyright (c) 2021 Arm Limited. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#----------------------------------------------------------------------------
+
+# If the path to a directory or source file has been defined,
+# get the type here (FILEPATH or PATH):
+if (DEFINED ${use_case}_FILE_PATH)
+ get_path_type(${${use_case}_FILE_PATH} PATH_TYPE)
+
+ # Set the default type if path is not a dir or file path (or undefined)
+ if (NOT ${PATH_TYPE} STREQUAL PATH AND NOT ${PATH_TYPE} STREQUAL FILEPATH)
+ message(FATAL_ERROR "Invalid ${use_case}_FILE_PATH. It should be a dir or file path.")
+ endif()
+else()
+ # Default is a directory path
+ set(PATH_TYPE PATH)
+endif()
+
+message(STATUS "${use_case}_FILE_PATH is of type: ${PATH_TYPE}")
+USER_OPTION(${use_case}_FILE_PATH "Directory with custom WAV input files, or path to a single WAV file, to use in the evaluation application."
+ ${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/samples/
+ ${PATH_TYPE})
+
+USER_OPTION(${use_case}_LABELS_TXT_FILE "Labels' txt file for the chosen model."
+ ${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/labels/ds_cnn_labels.txt
+ FILEPATH)
+
+USER_OPTION(${use_case}_AUDIO_RATE "Specify the target sampling rate. Default is 16000."
+ 16000
+ STRING)
+
+USER_OPTION(${use_case}_AUDIO_MONO "Specify if the audio needs to be converted to mono. Default is ON."
+ ON
+ BOOL)
+
+USER_OPTION(${use_case}_AUDIO_OFFSET "Specify the offset to start reading after this time (in seconds). Default is 0."
+ 0
+ STRING)
+
+USER_OPTION(${use_case}_AUDIO_DURATION "Specify the audio duration to load (in seconds). If set to 0 the entire audio will be processed."
+ 0
+ STRING)
+
+USER_OPTION(${use_case}_AUDIO_RES_TYPE "Specify re-sampling algorithm to use. By default is 'kaiser_best'."
+ kaiser_best
+ STRING)
+
+USER_OPTION(${use_case}_AUDIO_MIN_SAMPLES "Specify the minimum number of samples to use. By default is 16000, if the audio is shorter will be automatically padded."
+ 16000
+ STRING)
+
+USER_OPTION(${use_case}_MODEL_SCORE_THRESHOLD "Specify the score threshold [0.0, 1.0) that must be applied to the inference results for a label to be deemed valid."
+ 0.9
+ STRING)
+
+# Generate input files
+generate_audio_code(${${use_case}_FILE_PATH} ${SRC_GEN_DIR} ${INC_GEN_DIR}
+ ${${use_case}_AUDIO_RATE}
+ ${${use_case}_AUDIO_MONO}
+ ${${use_case}_AUDIO_OFFSET}
+ ${${use_case}_AUDIO_DURATION}
+ ${${use_case}_AUDIO_RES_TYPE}
+ ${${use_case}_AUDIO_MIN_SAMPLES})
+
+# Generate labels file
+set(${use_case}_LABELS_CPP_FILE Labels)
+generate_labels_code(
+ INPUT "${${use_case}_LABELS_TXT_FILE}"
+ DESTINATION_SRC ${SRC_GEN_DIR}
+ DESTINATION_HDR ${INC_GEN_DIR}
+ OUTPUT_FILENAME "${${use_case}_LABELS_CPP_FILE}"
+)
+
+USER_OPTION(${use_case}_ACTIVATION_BUF_SZ "Activation buffer size for the chosen model"
+ 0x00100000
+ STRING)
+
+# If there is no tflite file pointed to
+if (NOT DEFINED ${use_case}_MODEL_TFLITE_PATH)
+
+ set(MODEL_FILENAME ds_cnn_clustered_int8.tflite)
+ set(MODEL_RESOURCES_DIR ${DOWNLOAD_DEP_DIR}/${use_case})
+ file(MAKE_DIRECTORY ${MODEL_RESOURCES_DIR})
+ set(DEFAULT_MODEL_PATH ${MODEL_RESOURCES_DIR}/${MODEL_FILENAME})
+
+ # Download the default model
+ set(ZOO_COMMON_SUBPATH "models/keyword_spotting/ds_cnn_large/tflite_clustered_int8")
+ set(ZOO_MODEL_SUBPATH "${ZOO_COMMON_SUBPATH}/${MODEL_FILENAME}")
+
+ download_file_from_modelzoo(${ZOO_MODEL_SUBPATH} ${DEFAULT_MODEL_PATH})
+
+ if (ETHOS_U55_ENABLED)
+ message(STATUS
+ "Ethos-U55 is enabled, but the model downloaded is not optimized by vela. "
+ "To use Ethos-U55 acceleration, optimise the downloaded model and pass it "
+ "as ${use_case}_MODEL_TFLITE_PATH to the CMake configuration.")
+ endif()
+
+ # If the target platform is native
+ if (${TARGET_PLATFORM} STREQUAL native)
+
+ # Download test vectors
+ set(ZOO_TEST_IFM_SUBPATH "${ZOO_COMMON_SUBPATH}/testing_input/input_2/0.npy")
+ set(ZOO_TEST_OFM_SUBPATH "${ZOO_COMMON_SUBPATH}/testing_output/Identity/0.npy")
+
+ set(${use_case}_TEST_IFM ${MODEL_RESOURCES_DIR}/ifm0.npy CACHE FILEPATH
+ "Input test vector for ${use_case}")
+ set(${use_case}_TEST_OFM ${MODEL_RESOURCES_DIR}/ofm0.npy CACHE FILEPATH
+ "Input test vector for ${use_case}")
+
+ download_file_from_modelzoo(${ZOO_TEST_IFM_SUBPATH} ${${use_case}_TEST_IFM})
+ download_file_from_modelzoo(${ZOO_TEST_OFM_SUBPATH} ${${use_case}_TEST_OFM})
+
+ set(TEST_SRC_GEN_DIR ${CMAKE_BINARY_DIR}/generated/${use_case}/tests/src)
+ set(TEST_INC_GEN_DIR ${CMAKE_BINARY_DIR}/generated/${use_case}/tests/include)
+ file(MAKE_DIRECTORY ${TEST_SRC_GEN_DIR} ${TEST_INC_GEN_DIR})
+
+ # Generate test data files to be included in x86 tests
+ generate_test_data_code(
+ INPUT_DIR "${DOWNLOAD_DEP_DIR}/${use_case}"
+ DESTINATION_SRC ${TEST_SRC_GEN_DIR}
+ DESTINATION_HDR ${TEST_INC_GEN_DIR}
+ USECASE "${use_case}")
+ endif()
+
+else()
+ set(DEFAULT_MODEL_PATH "N/A")
+endif()
+
+set(EXTRA_MODEL_CODE
+ "/* Model parameters for ${use_case} */"
+ "extern const int g_FrameLength = 640"
+ "extern const int g_FrameStride = 320"
+ "extern const float g_ScoreThreshold = ${${use_case}_MODEL_SCORE_THRESHOLD}"
+ )
+
+USER_OPTION(${use_case}_MODEL_TFLITE_PATH "NN models file to be used in the evaluation application. Model files must be in tflite format."
+ ${DEFAULT_MODEL_PATH}
+ FILEPATH)
+
+# Generate model file
+generate_tflite_code(
+ MODEL_PATH ${${use_case}_MODEL_TFLITE_PATH}
+ DESTINATION ${SRC_GEN_DIR}
+ EXPRESSIONS ${EXTRA_MODEL_CODE}
+)
diff --git a/source/use_case/kws_asr/include/AsrClassifier.hpp b/source/use_case/kws_asr/include/AsrClassifier.hpp
new file mode 100644
index 0000000..de18aa8
--- /dev/null
+++ b/source/use_case/kws_asr/include/AsrClassifier.hpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ASR_CLASSIFIER_HPP
+#define ASR_CLASSIFIER_HPP
+
+#include "Classifier.hpp"
+
+namespace arm {
+namespace app {
+
+ class AsrClassifier : public Classifier {
+ public:
+ /**
+ * @brief Gets the top N classification results from the
+ * output vector.
+ * @param[in] outputTensor Inference output tensor from an NN model.
+ * @param[out] vecResults A vector of classification results
+ * populated by this function.
+ * @param[in] labels Labels vector to match classified classes
+ * @param[in] topNCount Number of top classifications to pick.
+ * @return true if successful, false otherwise.
+ **/
+ bool GetClassificationResults(
+ TfLiteTensor* outputTensor,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector <std::string>& labels, uint32_t topNCount) override;
+
+ private:
+
+ /**
+ * @brief Utility function that gets the top 1 classification results from the
+ * output tensor (vector of vector).
+ * @param[in] tensor Inference output tensor from an NN model.
+ * @param[out] vecResults A vector of classification results
+ * populated by this function.
+ * @param[in] labels Labels vector to match classified classes.
+ * @param[in] scale Quantization scale.
+ * @param[in] zeroPoint Quantization zero point.
+ * @return true if successful, false otherwise.
+ **/
+ template<typename T>
+ bool _GetTopResults(TfLiteTensor* tensor,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector <std::string>& labels, double scale, double zeroPoint);
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* ASR_CLASSIFIER_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/include/AsrResult.hpp b/source/use_case/kws_asr/include/AsrResult.hpp
new file mode 100644
index 0000000..25fa9e8
--- /dev/null
+++ b/source/use_case/kws_asr/include/AsrResult.hpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ASR_RESULT_HPP
+#define ASR_RESULT_HPP
+
+#include "ClassificationResult.hpp"
+
+#include <vector>
+
+namespace arm {
+namespace app {
+namespace asr {
+
+ using ResultVec = std::vector<arm::app::ClassificationResult>;
+
+ /* Structure for holding asr result. */
+ class AsrResult {
+
+ public:
+ ResultVec m_resultVec; /* Container for "thresholded" classification results. */
+ float m_timeStamp; /* Audio timestamp for this result. */
+ uint32_t m_inferenceNumber; /* Corresponding inference number. */
+ float m_threshold; /* Threshold value for `m_resultVec` */
+
+ AsrResult() = delete;
+ AsrResult(ResultVec& resultVec,
+ const float timestamp,
+ const uint32_t inferenceIdx,
+ const float scoreThreshold) {
+
+ this->m_threshold = scoreThreshold;
+ this->m_timeStamp = timestamp;
+ this->m_inferenceNumber = inferenceIdx;
+
+ this->m_resultVec = ResultVec();
+ for (auto& i : resultVec) {
+ if (i.m_normalisedVal >= this->m_threshold) {
+ this->m_resultVec.emplace_back(i);
+ }
+ }
+ }
+ ~AsrResult() = default;
+ };
+
+} /* namespace asr */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* ASR_RESULT_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/include/DsCnnMfcc.hpp b/source/use_case/kws_asr/include/DsCnnMfcc.hpp
new file mode 100644
index 0000000..c97dd9d
--- /dev/null
+++ b/source/use_case/kws_asr/include/DsCnnMfcc.hpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef KWS_ASR_DSCNN_MFCC_HPP
+#define KWS_ASR_DSCNN_MFCC_HPP
+
+#include "Mfcc.hpp"
+
+namespace arm {
+namespace app {
+namespace audio {
+
+ /* Class to provide DS-CNN specific MFCC calculation requirements. */
+ class DsCnnMFCC : public MFCC {
+
+ public:
+ static constexpr uint32_t ms_defaultSamplingFreq = 16000;
+ static constexpr uint32_t ms_defaultNumFbankBins = 40;
+ static constexpr uint32_t ms_defaultMelLoFreq = 20;
+ static constexpr uint32_t ms_defaultMelHiFreq = 4000;
+ static constexpr bool ms_defaultUseHtkMethod = true;
+
+
+ explicit DsCnnMFCC(const size_t numFeats, const size_t frameLen)
+ : MFCC(MfccParams(
+ ms_defaultSamplingFreq, ms_defaultNumFbankBins,
+ ms_defaultMelLoFreq, ms_defaultMelHiFreq,
+ numFeats, frameLen, ms_defaultUseHtkMethod))
+ {}
+ DsCnnMFCC() = delete;
+ ~DsCnnMFCC() = default;
+ };
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* KWS_ASR_DSCNN_MFCC_HPP */
diff --git a/source/use_case/kws_asr/include/DsCnnModel.hpp b/source/use_case/kws_asr/include/DsCnnModel.hpp
new file mode 100644
index 0000000..150a48c
--- /dev/null
+++ b/source/use_case/kws_asr/include/DsCnnModel.hpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef KWS_ASR_DSCNNMODEL_HPP
+#define KWS_ASR_DSCNNMODEL_HPP
+
+#include "Model.hpp"
+
+namespace arm {
+namespace app {
+namespace kws {
+ extern const int g_FrameLength;
+ extern const int g_FrameStride;
+ extern const float g_ScoreThreshold;
+ extern const uint32_t g_NumMfcc;
+ extern const uint32_t g_NumAudioWins;
+} /* namespace kws */
+} /* namespace app */
+} /* namespace arm */
+
+namespace arm {
+namespace app {
+
+ class DsCnnModel : public Model {
+ public:
+ /* Indices for the expected model - based on input and output tensor shapes */
+ static constexpr uint32_t ms_inputRowsIdx = 2;
+ static constexpr uint32_t ms_inputColsIdx = 3;
+ static constexpr uint32_t ms_outputRowsIdx = 2;
+ static constexpr uint32_t ms_outputColsIdx = 3;
+
+ protected:
+ /** @brief Gets the reference to op resolver interface class. */
+ const tflite::MicroOpResolver& GetOpResolver() override;
+
+ /** @brief Adds operations to the op resolver instance. */
+ bool EnlistOperations() override;
+
+ const uint8_t* ModelPointer() override;
+
+ size_t ModelSize() override;
+
+ private:
+ /* Maximum number of individual operations that can be enlisted. */
+ static constexpr int _ms_maxOpCnt = 10;
+
+ /* A mutable op resolver instance. */
+ tflite::MicroMutableOpResolver<_ms_maxOpCnt> _m_opResolver;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* KWS_DSCNNMODEL_HPP */
diff --git a/source/use_case/kws_asr/include/KwsResult.hpp b/source/use_case/kws_asr/include/KwsResult.hpp
new file mode 100644
index 0000000..45bb790
--- /dev/null
+++ b/source/use_case/kws_asr/include/KwsResult.hpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef KWS_RESULT_HPP
+#define KWS_RESULT_HPP
+
+#include "ClassificationResult.hpp"
+
+#include <vector>
+
+namespace arm {
+namespace app {
+namespace kws {
+
+ using ResultVec = std::vector < arm::app::ClassificationResult >;
+
+ /* Structure for holding kws result. */
+ class KwsResult {
+
+ public:
+ ResultVec m_resultVec; /* Container for "thresholded" classification results. */
+ float m_timeStamp; /* Audio timestamp for this result. */
+ uint32_t m_inferenceNumber; /* Corresponding inference number. */
+ float m_threshold; /* Threshold value for `m_resultVec.` */
+
+ KwsResult() = delete;
+ KwsResult(ResultVec& resultVec,
+ const float timestamp,
+ const uint32_t inferenceIdx,
+ const float scoreThreshold) {
+
+ this->m_threshold = scoreThreshold;
+ this->m_timeStamp = timestamp;
+ this->m_inferenceNumber = inferenceIdx;
+
+ this->m_resultVec = ResultVec();
+ for (auto & i : resultVec) {
+ if (i.m_normalisedVal >= this->m_threshold) {
+ this->m_resultVec.emplace_back(i);
+ }
+ }
+ }
+ ~KwsResult() = default;
+ };
+
+} /* namespace kws */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* KWS_RESULT_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/include/OutputDecode.hpp b/source/use_case/kws_asr/include/OutputDecode.hpp
new file mode 100644
index 0000000..2bbb29c
--- /dev/null
+++ b/source/use_case/kws_asr/include/OutputDecode.hpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef KWS_ASR_OUTPUT_DECODE_HPP
+#define KWS_ASR_OUTPUT_DECODE_HPP
+
+#include "AsrClassifier.hpp"
+
+namespace arm {
+namespace app {
+namespace audio {
+namespace asr {
+
+ /**
+ * @brief Gets the top N classification results from the
+ * output vector.
+ * @param[in] tensor Label output from classifier.
+ * @return true if successful, false otherwise.
+ **/
+ std::string DecodeOutput(const std::vector<ClassificationResult>& vecResults);
+
+} /* namespace asr */
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* KWS_ASR_OUTPUT_DECODE_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/include/UseCaseHandler.hpp b/source/use_case/kws_asr/include/UseCaseHandler.hpp
new file mode 100644
index 0000000..1c60662
--- /dev/null
+++ b/source/use_case/kws_asr/include/UseCaseHandler.hpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef KWS_ASR_EVT_HANDLER_HPP
+#define KWS_ASR_EVT_HANDLER_HPP
+
+#include "AppContext.hpp"
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief Handles the inference event.
+ * @param[in] ctx Pointer to the application context.
+ * @param[in] clipIndex Index to the audio clip to classify.
+ * @param[in] runAll Flag to request classification of all the available audio clips.
+ * @return true or false based on execution success.
+ **/
+ bool ClassifyAudioHandler(ApplicationContext& ctx, uint32_t clipIndex, bool runAll);
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* KWS_ASR_EVT_HANDLER_HPP */
diff --git a/source/use_case/kws_asr/include/Wav2LetterMfcc.hpp b/source/use_case/kws_asr/include/Wav2LetterMfcc.hpp
new file mode 100644
index 0000000..0852cbf
--- /dev/null
+++ b/source/use_case/kws_asr/include/Wav2LetterMfcc.hpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef KWS_ASR_WAV2LET_MFCC_HPP
+#define KWS_ASR_WAV2LET_MFCC_HPP
+
+#include "Mfcc.hpp"
+
+namespace arm {
+namespace app {
+namespace audio {
+
+ /* Class to provide Wav2Letter specific MFCC calculation requirements. */
+ class Wav2LetterMFCC : public MFCC {
+
+ public:
+ static constexpr uint32_t ms_defaultSamplingFreq = 16000;
+ static constexpr uint32_t ms_defaultNumFbankBins = 128;
+ static constexpr uint32_t ms_defaultMelLoFreq = 0;
+ static constexpr uint32_t ms_defaultMelHiFreq = 8000;
+ static constexpr bool ms_defaultUseHtkMethod = false;
+
+ explicit Wav2LetterMFCC(const size_t numFeats, const size_t frameLen)
+ : MFCC(MfccParams(
+ ms_defaultSamplingFreq, ms_defaultNumFbankBins,
+ ms_defaultMelLoFreq, ms_defaultMelHiFreq,
+ numFeats, frameLen, ms_defaultUseHtkMethod))
+ {}
+
+ Wav2LetterMFCC() = delete;
+ ~Wav2LetterMFCC() = default;
+
+ protected:
+
+ /**
+ * @brief Overrides base class implementation of this function.
+ * @param[in] fftVec Vector populated with FFT magnitudes.
+ * @param[in] melFilterBank 2D Vector with filter bank weights.
+ * @param[in] filterBankFilterFirst Vector containing the first indices of filter bank
+ * to be used for each bin.
+ * @param[in] filterBankFilterLast Vector containing the last indices of filter bank
+ * to be used for each bin.
+ * @param[out] melEnergies Pre-allocated vector of MEL energies to be
+ * populated.
+ * @return true if successful, false otherwise.
+ */
+ bool ApplyMelFilterBank(
+ std::vector<float>& fftVec,
+ std::vector<std::vector<float>>& melFilterBank,
+ std::vector<int32_t>& filterBankFilterFirst,
+ std::vector<int32_t>& filterBankFilterLast,
+ std::vector<float>& melEnergies) override;
+
+ /**
+ * @brief Override for the base class implementation convert mel
+ * energies to logarithmic scale. The difference from
+ * default behaviour is that the power is converted to dB
+ * and subsequently clamped.
+ * @param[in,out] melEnergies 1D vector of Mel energies.
+ **/
+ void ConvertToLogarithmicScale(
+ std::vector<float>& melEnergies) override;
+
+ /**
+ * @brief Create a matrix used to calculate Discrete Cosine
+ * Transform. Override for the base class' default
+ * implementation as the first and last elements
+ * use a different normaliser.
+ * @param[in] inputLength Input length of the buffer on which
+ * DCT will be performed.
+ * @param[in] coefficientCount Total coefficients per input length.
+ * @return 1D vector with inputLength x coefficientCount elements
+ * populated with DCT coefficients.
+ */
+ std::vector<float> CreateDCTMatrix(
+ int32_t inputLength,
+ int32_t coefficientCount) override;
+
+ /**
+ * @brief Given the low and high Mel values, get the normaliser
+ * for weights to be applied when populating the filter
+ * bank. Override for the base class implementation.
+ * @param[in] leftMel Low Mel frequency value.
+ * @param[in] rightMel High Mel frequency value.
+ * @param[in] useHTKMethod Bool to signal if HTK method is to be
+ * used for calculation.
+ * @return Value to use for normalising.
+ */
+ float GetMelFilterBankNormaliser(
+ const float& leftMel,
+ const float& rightMel,
+ bool useHTKMethod) override;
+ };
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* KWS_ASR_WAV2LET_MFCC_HPP */
diff --git a/source/use_case/kws_asr/include/Wav2LetterModel.hpp b/source/use_case/kws_asr/include/Wav2LetterModel.hpp
new file mode 100644
index 0000000..fb701ea
--- /dev/null
+++ b/source/use_case/kws_asr/include/Wav2LetterModel.hpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef KWS_ASR_WAV2LETTER_MODEL_HPP
+#define KWS_ASR_WAV2LETTER_MODEL_HPP
+
+#include "Model.hpp"
+
+namespace arm {
+namespace app {
+namespace asr {
+ extern const int g_FrameLength;
+ extern const int g_FrameStride;
+ extern const float g_ScoreThreshold;
+ extern const int g_ctxLen;
+} /* namespace asr */
+} /* namespace app */
+} /* namespace arm */
+
+namespace arm {
+namespace app {
+
+ class Wav2LetterModel : public Model {
+
+ public:
+ /* Indices for the expected model - based on input and output tensor shapes */
+ static constexpr uint32_t ms_inputRowsIdx = 1;
+ static constexpr uint32_t ms_inputColsIdx = 2;
+ static constexpr uint32_t ms_outputRowsIdx = 2;
+ static constexpr uint32_t ms_outputColsIdx = 3;
+
+ protected:
+ /** @brief Gets the reference to op resolver interface class. */
+ const tflite::MicroOpResolver& GetOpResolver() override;
+
+ /** @brief Adds operations to the op resolver instance. */
+ bool EnlistOperations() override;
+
+ const uint8_t* ModelPointer() override;
+
+ size_t ModelSize() override;
+
+ private:
+ /* Maximum number of individual operations that can be enlisted. */
+ static constexpr int _ms_maxOpCnt = 5;
+
+ /* A mutable op resolver instance. */
+ tflite::MicroMutableOpResolver<_ms_maxOpCnt> _m_opResolver;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* KWS_ASR_WAV2LETTER_MODEL_HPP */
diff --git a/source/use_case/kws_asr/include/Wav2LetterPostprocess.hpp b/source/use_case/kws_asr/include/Wav2LetterPostprocess.hpp
new file mode 100644
index 0000000..3a9d401
--- /dev/null
+++ b/source/use_case/kws_asr/include/Wav2LetterPostprocess.hpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef KWS_ASR_WAV2LET_POSTPROC_HPP
+#define KWS_ASR_WAV2LET_POSTPROC_HPP
+
+#include "TensorFlowLiteMicro.hpp" /* TensorFlow headers */
+#include "hal.h" /* stdout facility */
+
+namespace arm {
+namespace app {
+namespace audio {
+namespace asr {
+
+ /**
+ * @brief Helper class to manage tensor post-processing for "wav2letter"
+ * output.
+ */
+ class Postprocess {
+ public:
+ /**
+ * @brief Constructor
+ * @param[in] contextLen Left and right context length for
+ * output tensor.
+ * @param[in] innerLen This is the length of the section
+ * between left and right context.
+ **/
+ Postprocess(uint32_t contextLen,
+ uint32_t innerLen,
+ uint32_t blankTokenIdx);
+
+ Postprocess() = delete;
+ ~Postprocess() = default;
+
+ /**
+ * @brief Erases the required part of the tensor based
+ * on context lengths set up during initialisation
+ * @param[in] tensor Pointer to the tensor
+ * @param[in] axisIdx Index of the axis on which erase is
+ * performed.
+ * @param[in] lastIteration Flag to signal is this is the
+ * last iteration in which case
+ * the right context is preserved.
+ * @return true if successful, false otherwise.
+ */
+ bool Invoke(TfLiteTensor* tensor,
+ uint32_t axisIdx,
+ bool lastIteration = false);
+
+ private:
+ uint32_t _m_contextLen; /* Lengths of left and right contexts. */
+ uint32_t _m_innerLen; /* Length of inner context. */
+ uint32_t _m_totalLen; /* Total length of the required axis. */
+ uint32_t _m_countIterations; /* Current number of iterations. */
+ uint32_t _m_blankTokenIdx; /* Index of the labels blank token. */
+ /**
+ * @brief Checks if the tensor and axis index are valid
+ * inputs to the object - based on how it has been
+ * initialised.
+ * @return true if valid, false otherwise.
+ */
+ bool _IsInputValid(TfLiteTensor* tensor,
+ uint32_t axisIdx) const;
+
+ /**
+ * @brief Gets the tensor data element size in bytes based
+ * on the tensor type.
+ * @return Size in bytes, 0 if not supported.
+ */
+ uint32_t _GetTensorElementSize(TfLiteTensor* tensor);
+
+ /**
+ * @brief Erases sections from the data assuming row-wise
+ * arrangement along the context axis.
+ * @return true if successful, false otherwise.
+ */
+ bool _EraseSectionsRowWise(uint8_t* ptrData,
+ uint32_t strideSzBytes,
+ bool lastIteration);
+
+ };
+
+} /* namespace asr */
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* KWS_ASR_WAV2LET_POSTPROC_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/include/Wav2LetterPreprocess.hpp b/source/use_case/kws_asr/include/Wav2LetterPreprocess.hpp
new file mode 100644
index 0000000..3ffabb4
--- /dev/null
+++ b/source/use_case/kws_asr/include/Wav2LetterPreprocess.hpp
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef KWS_ASR_WAV2LET_PREPROC_HPP
+#define KWS_ASR_WAV2LET_PREPROC_HPP
+
+#include "Wav2LetterModel.hpp"
+#include "Wav2LetterMfcc.hpp"
+#include "AudioUtils.hpp"
+#include "DataStructures.hpp"
+
+namespace arm {
+namespace app {
+namespace audio {
+namespace asr {
+
+ /* Class to facilitate pre-processing calculation for Wav2Letter model
+ * for ASR. */
+ using AudioWindow = SlidingWindow <const int16_t>;
+
+ class Preprocess {
+ public:
+ /**
+ * @brief Constructor
+ * @param[in] numMfccFeatures Number of MFCC features per window.
+ * @param[in] windowLen Number of elements in a window.
+ * @param[in] windowStride Stride (in number of elements) for
+ * moving the window.
+ * @param[in] numMfccVectors Number of MFCC vectors per window.
+ */
+ Preprocess(
+ uint32_t numMfccFeatures,
+ uint32_t windowLen,
+ uint32_t windowStride,
+ uint32_t numMfccVectors);
+ Preprocess() = delete;
+ ~Preprocess() = default;
+
+ /**
+ * @brief Calculates the features required from audio data. This
+ * includes MFCC, first and second order deltas,
+ * normalisation and finally, quantisation. The tensor is
+ * populated with feature from a given window placed along
+ * in a single row.
+ * @param[in] audioData Pointer to the first element of audio data.
+ * @param[in] audioDataLen Number of elements in the audio data.
+ * @param[in] tensor Tensor to be populated.
+ * @return true if successful, false in case of error.
+ */
+ bool Invoke(const int16_t * audioData,
+ uint32_t audioDataLen,
+ TfLiteTensor * tensor);
+
+ protected:
+ /**
+ * @brief Computes the first and second order deltas for the
+ * MFCC buffers - they are assumed to be populated.
+ *
+ * @param[in] mfcc MFCC buffers.
+ * @param[out] delta1 Result of the first diff computation.
+ * @param[out] delta2 Result of the second diff computation.
+ *
+ * @return true if successful, false otherwise.
+ */
+ static bool _ComputeDeltas(Array2d<float>& mfcc,
+ Array2d<float>& delta1,
+ Array2d<float>& delta2);
+
+ /**
+ * @brief Given a 2D vector of floats, computes the mean.
+ * @param[in] vec Vector of vector of floats.
+ * @return Mean value.
+ */
+ static float _GetMean(Array2d<float>& vec);
+
+ /**
+ * @brief Given a 2D vector of floats, computes the stddev.
+ * @param[in] vec Vector of vector of floats.
+ * @param[in] mean Mean value of the vector passed in.
+ * @return stddev value.
+ */
+ static float _GetStdDev(Array2d<float>& vec,
+ float mean);
+
+ /**
+ * @brief Given a 2D vector of floats, normalises it using
+ * the mean and the stddev
+ * @param[in,out] vec Vector of vector of floats.
+ */
+ static void _NormaliseVec(Array2d<float>& vec);
+
+ /**
+ * @brief Normalises the MFCC and delta buffers.
+ */
+ void _Normalise();
+
+ /**
+ * @brief Given the quantisation and data type limits, computes
+ * the quantised values of a floating point input data.
+ * @param[in] elem Element to be quantised.
+ * @param[in] quantScale Scale.
+ * @param[in] quantOffset Offset.
+ * @param[in] minVal Numerical limit - minimum.
+ * @param[in] maxVal Numerical limit - maximum.
+ * @return Floating point quantised value.
+ */
+ static float _GetQuantElem(
+ float elem,
+ float quantScale,
+ int quantOffset,
+ float minVal,
+ float maxVal);
+
+ /**
+ * @brief Quantises the MFCC and delta buffers, and places them
+ * in the output buffer. While doing so, it transposes
+ * the data. Reason: Buffers in this class are arranged
+ * for "time" axis to be row major. Primary reason for
+ * this being the convolution speed up (as we can use
+ * contiguous memory). The output, however, requires the
+ * time axis to be in column major arrangement.
+ * @param[in] outputBuf Pointer to the output buffer.
+ * @param[in] outputBufSz Output buffer's size.
+ * @param[in] quantScale Quantisation scale.
+ * @param[in] quantOffset Quantisation offset.
+ */
+ template <typename T>
+ bool _Quantise(
+ T * outputBuf,
+ const uint32_t outputBufSz,
+ const float quantScale,
+ const int quantOffset)
+ {
+ /* Check the output size will for everything. */
+ if (outputBufSz < (this->_m_mfccBuf.size(0) * 3 * sizeof(T))) {
+ printf_err("Tensor size too small for features\n");
+ return false;
+ }
+
+ /* Populate. */
+ T * outputBufMfcc = outputBuf;
+ T * outputBufD1 = outputBuf + this->_m_numMfccFeats;
+ T * outputBufD2 = outputBufD1 + this->_m_numMfccFeats;
+ const uint32_t ptrIncr = this->_m_numMfccFeats * 2; /* (3 vectors - 1 vector) */
+
+ const float minVal = std::numeric_limits<T>::min();
+ const float maxVal = std::numeric_limits<T>::max();
+
+ /* We need to do a transpose while copying and concatenating
+ * the tensor. */
+ for (uint32_t j = 0; j < this->_m_numFeatVectors; ++j) {
+ for (uint32_t i = 0; i < this->_m_numMfccFeats; ++i) {
+ *outputBufMfcc++ = static_cast<T>(this->_GetQuantElem(
+ this->_m_mfccBuf(i, j), quantScale,
+ quantOffset, minVal, maxVal));
+ *outputBufD1++ = static_cast<T>(this->_GetQuantElem(
+ this->_m_delta1Buf(i, j), quantScale,
+ quantOffset, minVal, maxVal));
+ *outputBufD2++ = static_cast<T>(this->_GetQuantElem(
+ this->_m_delta2Buf(i, j), quantScale,
+ quantOffset, minVal, maxVal));
+ }
+ outputBufMfcc += ptrIncr;
+ outputBufD1 += ptrIncr;
+ outputBufD2 += ptrIncr;
+ }
+
+ return true;
+ }
+
+ private:
+ Wav2LetterMFCC _m_mfcc; /* MFCC instance. */
+
+ /* Actual buffers to be populated. */
+ Array2d<float> _m_mfccBuf; /* Contiguous buffer 1D: MFCC */
+ Array2d<float> _m_delta1Buf; /* Contiguous buffer 1D: Delta 1 */
+ Array2d<float> _m_delta2Buf; /* Contiguous buffer 1D: Delta 2 */
+
+ uint32_t _m_windowLen; /* Window length for MFCC. */
+ uint32_t _m_windowStride; /* Window stride len for MFCC. */
+ uint32_t _m_numMfccFeats; /* Number of MFCC features per window. */
+ uint32_t _m_numFeatVectors; /* Number of _m_numMfccFeats. */
+ AudioWindow _m_window; /* Sliding window. */
+
+ };
+
+} /* namespace asr */
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* KWS_ASR_WAV2LET_PREPROC_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/src/AsrClassifier.cc b/source/use_case/kws_asr/src/AsrClassifier.cc
new file mode 100644
index 0000000..bc86e09
--- /dev/null
+++ b/source/use_case/kws_asr/src/AsrClassifier.cc
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "AsrClassifier.hpp"
+
+#include "hal.h"
+#include "TensorFlowLiteMicro.hpp"
+#include "Wav2LetterModel.hpp"
+
+template<typename T>
+bool arm::app::AsrClassifier::_GetTopResults(TfLiteTensor* tensor,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector <std::string>& labels, double scale, double zeroPoint)
+{
+ const uint32_t nElems = tensor->dims->data[arm::app::Wav2LetterModel::ms_outputRowsIdx];
+ const uint32_t nLetters = tensor->dims->data[arm::app::Wav2LetterModel::ms_outputColsIdx];
+
+
+ /* NOTE: tensor's size verification against labels should be
+ * checked by the calling/public function. */
+ if (nLetters < 1) {
+ return false;
+ }
+
+ /* Final results' container. */
+ vecResults = std::vector<ClassificationResult>(nElems);
+
+ T* tensorData = tflite::GetTensorData<T>(tensor);
+
+ /* Get the top 1 results. */
+ for (uint32_t i = 0, row = 0; i < nElems; ++i, row+=nLetters) {
+ std::pair<T, uint32_t> top_1 = std::make_pair(tensorData[row + 0], 0);
+
+ for (uint32_t j = 1; j < nLetters; ++j) {
+ if (top_1.first < tensorData[row + j]) {
+ top_1.first = tensorData[row + j];
+ top_1.second = j;
+ }
+ }
+
+ double score = static_cast<int> (top_1.first);
+ vecResults[i].m_normalisedVal = scale * (score - zeroPoint);
+ vecResults[i].m_label = labels[top_1.second];
+ vecResults[i].m_labelIdx = top_1.second;
+ }
+
+ return true;
+}
+template bool arm::app::AsrClassifier::_GetTopResults<uint8_t>(TfLiteTensor* tensor,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector <std::string>& labels, double scale, double zeroPoint);
+template bool arm::app::AsrClassifier::_GetTopResults<int8_t>(TfLiteTensor* tensor,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector <std::string>& labels, double scale, double zeroPoint);
+
+bool arm::app::AsrClassifier::GetClassificationResults(
+ TfLiteTensor* outputTensor,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector <std::string>& labels, uint32_t topNCount)
+{
+ vecResults.clear();
+
+ constexpr int minTensorDims = static_cast<int>(
+ (arm::app::Wav2LetterModel::ms_outputRowsIdx > arm::app::Wav2LetterModel::ms_outputColsIdx)?
+ arm::app::Wav2LetterModel::ms_outputRowsIdx : arm::app::Wav2LetterModel::ms_outputColsIdx);
+
+ constexpr uint32_t outColsIdx = arm::app::Wav2LetterModel::ms_outputColsIdx;
+
+ /* Sanity checks. */
+ if (outputTensor == nullptr) {
+ printf_err("Output vector is null pointer.\n");
+ return false;
+ } else if (outputTensor->dims->size < minTensorDims) {
+ printf_err("Output tensor expected to be 3D (1, m, n)\n");
+ return false;
+ } else if (static_cast<uint32_t>(outputTensor->dims->data[outColsIdx]) < topNCount) {
+ printf_err("Output vectors are smaller than %u\n", topNCount);
+ return false;
+ } else if (static_cast<uint32_t>(outputTensor->dims->data[outColsIdx]) != labels.size()) {
+ printf("Output size doesn't match the labels' size\n");
+ return false;
+ }
+
+ if (topNCount != 1) {
+ warn("TopNCount value ignored in this implementation\n");
+ }
+
+ /* To return the floating point values, we need quantization parameters. */
+ QuantParams quantParams = GetTensorQuantParams(outputTensor);
+
+ bool resultState;
+
+ switch (outputTensor->type) {
+ case kTfLiteUInt8:
+ resultState = this->_GetTopResults<uint8_t>(
+ outputTensor, vecResults,
+ labels, quantParams.scale,
+ quantParams.offset);
+ break;
+ case kTfLiteInt8:
+ resultState = this->_GetTopResults<int8_t>(
+ outputTensor, vecResults,
+ labels, quantParams.scale,
+ quantParams.offset);
+ break;
+ default:
+ printf_err("Tensor type %s not supported by classifier\n",
+ TfLiteTypeGetName(outputTensor->type));
+ return false;
+ }
+
+ if (!resultState) {
+ printf_err("Failed to get sorted set\n");
+ return false;
+ }
+
+ return true;
+} \ No newline at end of file
diff --git a/source/use_case/kws_asr/src/DsCnnModel.cc b/source/use_case/kws_asr/src/DsCnnModel.cc
new file mode 100644
index 0000000..b573a12
--- /dev/null
+++ b/source/use_case/kws_asr/src/DsCnnModel.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "DsCnnModel.hpp"
+
+#include "hal.h"
+
+namespace arm {
+namespace app {
+namespace kws {
+ extern uint8_t* GetModelPointer();
+ extern size_t GetModelLen();
+} /* namespace kws */
+} /* namespace app */
+} /* namespace arm */
+
+const tflite::MicroOpResolver& arm::app::DsCnnModel::GetOpResolver()
+{
+ return this->_m_opResolver;
+}
+
+bool arm::app::DsCnnModel::EnlistOperations()
+{
+ this->_m_opResolver.AddAveragePool2D();
+ this->_m_opResolver.AddConv2D();
+ this->_m_opResolver.AddDepthwiseConv2D();
+ this->_m_opResolver.AddFullyConnected();
+ this->_m_opResolver.AddRelu();
+ this->_m_opResolver.AddSoftmax();
+ this->_m_opResolver.AddQuantize();
+ this->_m_opResolver.AddDequantize();
+ this->_m_opResolver.AddReshape();
+
+#if defined(ARM_NPU)
+ if (kTfLiteOk == this->_m_opResolver.AddEthosU()) {
+ info("Added %s support to op resolver\n",
+ tflite::GetString_ETHOSU());
+ } else {
+ printf_err("Failed to add Arm NPU support to op resolver.");
+ return false;
+ }
+#endif /* ARM_NPU */
+ return true;
+}
+
+const uint8_t* arm::app::DsCnnModel::ModelPointer()
+{
+ return arm::app::kws::GetModelPointer();
+}
+
+size_t arm::app::DsCnnModel::ModelSize()
+{
+ return arm::app::kws::GetModelLen();
+} \ No newline at end of file
diff --git a/source/use_case/kws_asr/src/MainLoop.cc b/source/use_case/kws_asr/src/MainLoop.cc
new file mode 100644
index 0000000..37146c9
--- /dev/null
+++ b/source/use_case/kws_asr/src/MainLoop.cc
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "hal.h" /* Brings in platform definitions. */
+#include "InputFiles.hpp" /* For input images. */
+#include "Labels_dscnn.hpp" /* For DS-CNN label strings. */
+#include "Labels_wav2letter.hpp" /* For Wav2Letter label strings. */
+#include "Classifier.hpp" /* KWS classifier. */
+#include "AsrClassifier.hpp" /* ASR classifier. */
+#include "DsCnnModel.hpp" /* KWS model class for running inference. */
+#include "Wav2LetterModel.hpp" /* ASR model class for running inference. */
+#include "UseCaseCommonUtils.hpp" /* Utils functions. */
+#include "UseCaseHandler.hpp" /* Handlers for different user options. */
+#include "Wav2LetterPreprocess.hpp" /* ASR pre-processing class. */
+#include "Wav2LetterPostprocess.hpp"/* ASR post-processing class. */
+
+using KwsClassifier = arm::app::Classifier;
+
+enum opcodes
+{
+ MENU_OPT_RUN_INF_NEXT = 1, /* Run on next vector. */
+ MENU_OPT_RUN_INF_CHOSEN, /* Run on a user provided vector index. */
+ MENU_OPT_RUN_INF_ALL, /* Run inference on all. */
+ MENU_OPT_SHOW_MODEL_INFO, /* Show model info. */
+ MENU_OPT_LIST_AUDIO_CLIPS /* List the current baked audio clips. */
+};
+
+static void DisplayMenu()
+{
+ printf("\n\nUser input required\n");
+ printf("Enter option number from:\n\n");
+ printf(" %u. Classify next audio clip\n", MENU_OPT_RUN_INF_NEXT);
+ printf(" %u. Classify audio clip at chosen index\n", MENU_OPT_RUN_INF_CHOSEN);
+ printf(" %u. Run classification on all audio clips\n", MENU_OPT_RUN_INF_ALL);
+ printf(" %u. Show NN model info\n", MENU_OPT_SHOW_MODEL_INFO);
+ printf(" %u. List audio clips\n\n", MENU_OPT_LIST_AUDIO_CLIPS);
+ printf(" Choice: ");
+}
+
+/** @brief Gets the number of MFCC features for a single window. */
+static uint32_t GetNumMfccFeatures(const arm::app::Model& model);
+
+/** @brief Gets the number of MFCC feature vectors to be computed. */
+static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model);
+
+/** @brief Gets the output context length (left and right) for post-processing. */
+static uint32_t GetOutputContextLen(const arm::app::Model& model,
+ uint32_t inputCtxLen);
+
+/** @brief Gets the output inner length for post-processing. */
+static uint32_t GetOutputInnerLen(const arm::app::Model& model,
+ uint32_t outputCtxLen);
+
+void main_loop(hal_platform& platform)
+{
+ /* Model wrapper objects. */
+ arm::app::DsCnnModel kwsModel;
+ arm::app::Wav2LetterModel asrModel;
+
+ /* Load the models. */
+ if (!kwsModel.Init()) {
+ printf_err("Failed to initialise KWS model\n");
+ return;
+ }
+
+ /* Initialise the asr model using the same allocator from KWS
+ * to re-use the tensor arena. */
+ if (!asrModel.Init(kwsModel.GetAllocator())) {
+ printf_err("Failed to initalise ASR model\n");
+ return;
+ }
+
+ /* Initialise ASR pre-processing. */
+ arm::app::audio::asr::Preprocess prep(
+ GetNumMfccFeatures(asrModel),
+ arm::app::asr::g_FrameLength,
+ arm::app::asr::g_FrameStride,
+ GetNumMfccFeatureVectors(asrModel));
+
+ /* Initialise ASR post-processing. */
+ const uint32_t outputCtxLen = GetOutputContextLen(asrModel, arm::app::asr::g_ctxLen);
+ const uint32_t blankTokenIdx = 28;
+ arm::app::audio::asr::Postprocess postp(
+ outputCtxLen,
+ GetOutputInnerLen(asrModel, outputCtxLen),
+ blankTokenIdx);
+
+ /* Instantiate application context. */
+ arm::app::ApplicationContext caseContext;
+
+ caseContext.Set<hal_platform&>("platform", platform);
+ caseContext.Set<arm::app::Model&>("kwsmodel", kwsModel);
+ caseContext.Set<arm::app::Model&>("asrmodel", asrModel);
+ caseContext.Set<uint32_t>("clipIndex", 0);
+ caseContext.Set<uint32_t>("ctxLen", arm::app::asr::g_ctxLen); /* Left and right context length (MFCC feat vectors). */
+ caseContext.Set<int>("kwsframeLength", arm::app::kws::g_FrameLength);
+ caseContext.Set<int>("kwsframeStride", arm::app::kws::g_FrameStride);
+ caseContext.Set<float>("kwsscoreThreshold", arm::app::kws::g_ScoreThreshold); /* Normalised score threshold. */
+ caseContext.Set<uint32_t >("kwsNumMfcc", arm::app::kws::g_NumMfcc);
+ caseContext.Set<uint32_t >("kwsNumAudioWins", arm::app::kws::g_NumAudioWins);
+
+ caseContext.Set<int>("asrframeLength", arm::app::asr::g_FrameLength);
+ caseContext.Set<int>("asrframeStride", arm::app::asr::g_FrameStride);
+ caseContext.Set<float>("asrscoreThreshold", arm::app::asr::g_ScoreThreshold); /* Normalised score threshold. */
+
+ KwsClassifier kwsClassifier; /* Classifier wrapper object. */
+ arm::app::AsrClassifier asrClassifier; /* Classifier wrapper object. */
+ caseContext.Set<arm::app::Classifier&>("kwsclassifier", kwsClassifier);
+ caseContext.Set<arm::app::AsrClassifier&>("asrclassifier", asrClassifier);
+
+ caseContext.Set<arm::app::audio::asr::Preprocess&>("preprocess", prep);
+ caseContext.Set<arm::app::audio::asr::Postprocess&>("postprocess", postp);
+
+ std::vector<std::string> asrLabels;
+ arm::app::asr::GetLabelsVector(asrLabels);
+ std::vector<std::string> kwsLabels;
+ arm::app::kws::GetLabelsVector(kwsLabels);
+ caseContext.Set<const std::vector <std::string>&>("asrlabels", asrLabels);
+ caseContext.Set<const std::vector <std::string>&>("kwslabels", kwsLabels);
+
+ /* Index of the kws outputs we trigger ASR on. */
+ caseContext.Set<uint32_t>("keywordindex", 2);
+
+ /* Loop. */
+ bool executionSuccessful = true;
+ constexpr bool bUseMenu = NUMBER_OF_FILES > 1 ? true : false;
+
+ /* Loop. */
+ do {
+ int menuOption = MENU_OPT_RUN_INF_NEXT;
+ if (bUseMenu) {
+ DisplayMenu();
+ menuOption = arm::app::ReadUserInputAsInt(platform);
+ printf("\n");
+ }
+ switch (menuOption) {
+ case MENU_OPT_RUN_INF_NEXT:
+ executionSuccessful = ClassifyAudioHandler(
+ caseContext,
+ caseContext.Get<uint32_t>("clipIndex"),
+ false);
+ break;
+ case MENU_OPT_RUN_INF_CHOSEN: {
+ printf(" Enter the audio clip index [0, %d]: ",
+ NUMBER_OF_FILES-1);
+ auto clipIndex = static_cast<uint32_t>(
+ arm::app::ReadUserInputAsInt(platform));
+ executionSuccessful = ClassifyAudioHandler(caseContext,
+ clipIndex,
+ false);
+ break;
+ }
+ case MENU_OPT_RUN_INF_ALL:
+ executionSuccessful = ClassifyAudioHandler(
+ caseContext,
+ caseContext.Get<uint32_t>("clipIndex"),
+ true);
+ break;
+ case MENU_OPT_SHOW_MODEL_INFO:
+ executionSuccessful = kwsModel.ShowModelInfoHandler();
+ executionSuccessful = asrModel.ShowModelInfoHandler();
+ break;
+ case MENU_OPT_LIST_AUDIO_CLIPS:
+ executionSuccessful = ListFilesHandler(caseContext);
+ break;
+ default:
+ printf("Incorrect choice, try again.");
+ break;
+ }
+ } while (executionSuccessful && bUseMenu);
+ info("Main loop terminated.\n");
+}
+
+static uint32_t GetNumMfccFeatures(const arm::app::Model& model)
+{
+ TfLiteTensor* inputTensor = model.GetInputTensor(0);
+ const int inputCols = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputColsIdx];
+ if (0 != inputCols % 3) {
+ printf_err("Number of input columns is not a multiple of 3\n");
+ }
+ return std::max(inputCols/3, 0);
+}
+
+static uint32_t GetNumMfccFeatureVectors(const arm::app::Model& model)
+{
+ TfLiteTensor* inputTensor = model.GetInputTensor(0);
+ const int inputRows = inputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputRowsIdx];
+ return std::max(inputRows, 0);
+}
+
+static uint32_t GetOutputContextLen(const arm::app::Model& model, const uint32_t inputCtxLen)
+{
+ const uint32_t inputRows = GetNumMfccFeatureVectors(model);
+ const uint32_t inputInnerLen = inputRows - (2 * inputCtxLen);
+ constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx;
+
+ /* Check to make sure that the input tensor supports the above context and inner lengths. */
+ if (inputRows <= 2 * inputCtxLen || inputRows <= inputInnerLen) {
+ printf_err("Input rows not compatible with ctx of %u\n",
+ inputCtxLen);
+ return 0;
+ }
+
+ TfLiteTensor* outputTensor = model.GetOutputTensor(0);
+ const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);
+
+ const float tensorColRatio = static_cast<float>(inputRows)/
+ static_cast<float>(outputRows);
+
+ return std::round(static_cast<float>(inputCtxLen)/tensorColRatio);
+}
+
+static uint32_t GetOutputInnerLen(const arm::app::Model& model,
+ const uint32_t outputCtxLen)
+{
+ constexpr uint32_t ms_outputRowsIdx = arm::app::Wav2LetterModel::ms_outputRowsIdx;
+ TfLiteTensor* outputTensor = model.GetOutputTensor(0);
+ const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);
+ return (outputRows - (2 * outputCtxLen));
+}
diff --git a/source/use_case/kws_asr/src/OutputDecode.cc b/source/use_case/kws_asr/src/OutputDecode.cc
new file mode 100644
index 0000000..41fbe07
--- /dev/null
+++ b/source/use_case/kws_asr/src/OutputDecode.cc
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "OutputDecode.hpp"
+
+namespace arm {
+namespace app {
+namespace audio {
+namespace asr {
+
+ std::string DecodeOutput(const std::vector<ClassificationResult>& vecResults)
+ {
+ std::string CleanOutputBuffer;
+
+ for (size_t i = 0; i < vecResults.size(); ++i) /* For all elements in vector. */
+ {
+ while (i+1 < vecResults.size() &&
+ vecResults[i].m_label == vecResults[i+1].m_label) /* While the current element is equal to the next, ignore it and move on. */
+ {
+ ++i;
+ }
+ if (vecResults[i].m_label != "$") /* $ is a character used to represent unknown and double characters so should not be in output. */
+ {
+ CleanOutputBuffer += vecResults[i].m_label; /* If the element is different to the next, it will be appended to CleanOutputBuffer. */
+ }
+ }
+
+ return CleanOutputBuffer; /* Return string type containing clean output. */
+ }
+
+} /* namespace asr */
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/use_case/kws_asr/src/UseCaseHandler.cc b/source/use_case/kws_asr/src/UseCaseHandler.cc
new file mode 100644
index 0000000..c50796f
--- /dev/null
+++ b/source/use_case/kws_asr/src/UseCaseHandler.cc
@@ -0,0 +1,707 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "UseCaseHandler.hpp"
+
+#include "hal.h"
+#include "InputFiles.hpp"
+#include "AudioUtils.hpp"
+#include "UseCaseCommonUtils.hpp"
+#include "DsCnnModel.hpp"
+#include "DsCnnMfcc.hpp"
+#include "Classifier.hpp"
+#include "KwsResult.hpp"
+#include "Wav2LetterMfcc.hpp"
+#include "Wav2LetterPreprocess.hpp"
+#include "Wav2LetterPostprocess.hpp"
+#include "AsrResult.hpp"
+#include "AsrClassifier.hpp"
+#include "OutputDecode.hpp"
+
+
+using KwsClassifier = arm::app::Classifier;
+
+namespace arm {
+namespace app {
+
+ enum AsrOutputReductionAxis {
+ AxisRow = 1,
+ AxisCol = 2
+ };
+
+ struct KWSOutput {
+ bool executionSuccess = false;
+ const int16_t* asrAudioStart = nullptr;
+ int32_t asrAudioSamples = 0;
+ };
+
+ /**
+ * @brief Helper function to increment current audio clip index
+ * @param[in,out] ctx pointer to the application context object
+ **/
+ static void _IncrementAppCtxClipIdx(ApplicationContext& ctx);
+
+ /**
+ * @brief Helper function to increment current audio clip index
+ * @param[in,out] ctx pointer to the application context object
+ **/
+ static void _IncrementAppCtxClipIdx(ApplicationContext& ctx);
+
+ /**
+ * @brief Helper function to set the audio clip index
+ * @param[in,out] ctx pointer to the application context object
+ * @param[in] idx value to be set
+ * @return true if index is set, false otherwise
+ **/
+ static bool _SetAppCtxClipIdx(ApplicationContext& ctx, uint32_t idx);
+
+ /**
+ * @brief Presents kws inference results using the data presentation
+ * object.
+ * @param[in] platform reference to the hal platform object
+ * @param[in] results vector of classification results to be displayed
+ * @param[in] infTimeMs inference time in milliseconds, if available
+ * Otherwise, this can be passed in as 0.
+ * @return true if successful, false otherwise
+ **/
+ static bool _PresentInferenceResult(hal_platform& platform, std::vector<arm::app::kws::KwsResult>& results);
+
+ /**
+ * @brief Presents asr inference results using the data presentation
+ * object.
+ * @param[in] platform reference to the hal platform object
+ * @param[in] results vector of classification results to be displayed
+ * @param[in] infTimeMs inference time in milliseconds, if available
+ * Otherwise, this can be passed in as 0.
+ * @return true if successful, false otherwise
+ **/
+ static bool _PresentInferenceResult(hal_platform& platform, std::vector<arm::app::asr::AsrResult>& results);
+
+ /**
+ * @brief Returns a function to perform feature calculation and populates input tensor data with
+ * MFCC data.
+ *
+ * Input tensor data type check is performed to choose correct MFCC feature data type.
+ * If tensor has an integer data type then original features are quantised.
+ *
+ * Warning: mfcc calculator provided as input must have the same life scope as returned function.
+ *
+ * @param[in] mfcc MFCC feature calculator.
+ * @param[in,out] inputTensor Input tensor pointer to store calculated features.
+ * @param[in] cacheSize Size of the feture vectors cache (number of feature vectors).
+ *
+ * @return function function to be called providing audio sample and sliding window index.
+ **/
+ static std::function<void (std::vector<int16_t>&, int, bool, size_t)>
+ GetFeatureCalculator(audio::DsCnnMFCC& mfcc,
+ TfLiteTensor* inputTensor,
+ size_t cacheSize);
+
+ /**
+ * @brief Performs the KWS pipeline.
+ * @param[in,out] ctx pointer to the application context object
+ *
+ * @return KWSOutput struct containing pointer to audio data where ASR should begin
+ * and how much data to process.
+ */
+ static KWSOutput doKws(ApplicationContext& ctx) {
+ constexpr uint32_t dataPsnTxtInfStartX = 20;
+ constexpr uint32_t dataPsnTxtInfStartY = 40;
+
+ constexpr int minTensorDims = static_cast<int>(
+ (arm::app::DsCnnModel::ms_inputRowsIdx > arm::app::DsCnnModel::ms_inputColsIdx)?
+ arm::app::DsCnnModel::ms_inputRowsIdx : arm::app::DsCnnModel::ms_inputColsIdx);
+
+ KWSOutput output;
+
+ auto& kwsModel = ctx.Get<Model&>("kwsmodel");
+ if (!kwsModel.IsInited()) {
+ printf_err("KWS model has not been initialised\n");
+ return output;
+ }
+
+ const int kwsFrameLength = ctx.Get<int>("kwsframeLength");
+ const int kwsFrameStride = ctx.Get<int>("kwsframeStride");
+ const float kwsScoreThreshold = ctx.Get<float>("kwsscoreThreshold");
+
+ TfLiteTensor* kwsOutputTensor = kwsModel.GetOutputTensor(0);
+ TfLiteTensor* kwsInputTensor = kwsModel.GetInputTensor(0);
+
+ if (!kwsInputTensor->dims) {
+ printf_err("Invalid input tensor dims\n");
+ return output;
+ } else if (kwsInputTensor->dims->size < minTensorDims) {
+ printf_err("Input tensor dimension should be >= %d\n", minTensorDims);
+ return output;
+ }
+
+ const uint32_t kwsNumMfccFeats = ctx.Get<uint32_t>("kwsNumMfcc");
+ const uint32_t kwsNumAudioWindows = ctx.Get<uint32_t>("kwsNumAudioWins");
+
+ audio::DsCnnMFCC kwsMfcc = audio::DsCnnMFCC(kwsNumMfccFeats, kwsFrameLength);
+ kwsMfcc.Init();
+
+ /* Deduce the data length required for 1 KWS inference from the network parameters. */
+ auto kwsAudioDataWindowSize = kwsNumAudioWindows * kwsFrameStride +
+ (kwsFrameLength - kwsFrameStride);
+ auto kwsMfccWindowSize = kwsFrameLength;
+ auto kwsMfccWindowStride = kwsFrameStride;
+
+ /* We are choosing to move by half the window size => for a 1 second window size,
+ * this means an overlap of 0.5 seconds. */
+ auto kwsAudioDataStride = kwsAudioDataWindowSize / 2;
+
+ info("KWS audio data window size %u\n", kwsAudioDataWindowSize);
+
+ /* Stride must be multiple of mfcc features window stride to re-use features. */
+ if (0 != kwsAudioDataStride % kwsMfccWindowStride) {
+ kwsAudioDataStride -= kwsAudioDataStride % kwsMfccWindowStride;
+ }
+
+ auto kwsMfccVectorsInAudioStride = kwsAudioDataStride/kwsMfccWindowStride;
+
+ /* We expect to be sampling 1 second worth of data at a time
+ * NOTE: This is only used for time stamp calculation. */
+ const float kwsAudioParamsSecondsPerSample = 1.0/audio::DsCnnMFCC::ms_defaultSamplingFreq;
+
+ auto currentIndex = ctx.Get<uint32_t>("clipIndex");
+
+ /* Creating a mfcc features sliding window for the data required for 1 inference. */
+ auto kwsAudioMFCCWindowSlider = audio::SlidingWindow<const int16_t>(
+ get_audio_array(currentIndex),
+ kwsAudioDataWindowSize, kwsMfccWindowSize,
+ kwsMfccWindowStride);
+
+ /* Creating a sliding window through the whole audio clip. */
+ auto audioDataSlider = audio::SlidingWindow<const int16_t>(
+ get_audio_array(currentIndex),
+ get_audio_array_size(currentIndex),
+ kwsAudioDataWindowSize, kwsAudioDataStride);
+
+ /* Calculate number of the feature vectors in the window overlap region.
+ * These feature vectors will be reused.*/
+ size_t numberOfReusedFeatureVectors = kwsAudioMFCCWindowSlider.TotalStrides() + 1
+ - kwsMfccVectorsInAudioStride;
+
+ auto kwsMfccFeatureCalc = GetFeatureCalculator(kwsMfcc, kwsInputTensor,
+ numberOfReusedFeatureVectors);
+
+ if (!kwsMfccFeatureCalc){
+ return output;
+ }
+
+ /* Container for KWS results. */
+ std::vector<arm::app::kws::KwsResult> kwsResults;
+
+ /* Display message on the LCD - inference running. */
+ auto& platform = ctx.Get<hal_platform&>("platform");
+ std::string str_inf{"Running KWS inference... "};
+ platform.data_psn->present_data_text(
+ str_inf.c_str(), str_inf.size(),
+ dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+
+ info("Running KWS inference on audio clip %u => %s\n",
+ currentIndex, get_filename(currentIndex));
+
+ /* Start sliding through audio clip. */
+ while (audioDataSlider.HasNext()) {
+ const int16_t* inferenceWindow = audioDataSlider.Next();
+
+ /* We moved to the next window - set the features sliding to the new address. */
+ kwsAudioMFCCWindowSlider.Reset(inferenceWindow);
+
+ /* The first window does not have cache ready. */
+ bool useCache = audioDataSlider.Index() > 0 && numberOfReusedFeatureVectors > 0;
+
+ /* Start calculating features inside one audio sliding window. */
+ while (kwsAudioMFCCWindowSlider.HasNext()) {
+ const int16_t* kwsMfccWindow = kwsAudioMFCCWindowSlider.Next();
+ std::vector<int16_t> kwsMfccAudioData =
+ std::vector<int16_t>(kwsMfccWindow, kwsMfccWindow + kwsMfccWindowSize);
+
+ /* Compute features for this window and write them to input tensor. */
+ kwsMfccFeatureCalc(kwsMfccAudioData,
+ kwsAudioMFCCWindowSlider.Index(),
+ useCache,
+ kwsMfccVectorsInAudioStride);
+ }
+
+ info("Inference %zu/%zu\n", audioDataSlider.Index() + 1,
+ audioDataSlider.TotalStrides() + 1);
+
+ /* Run inference over this audio clip sliding window. */
+ arm::app::RunInference(platform, kwsModel);
+
+ std::vector<ClassificationResult> kwsClassificationResult;
+ auto& kwsClassifier = ctx.Get<KwsClassifier&>("kwsclassifier");
+
+ kwsClassifier.GetClassificationResults(
+ kwsOutputTensor, kwsClassificationResult,
+ ctx.Get<std::vector<std::string>&>("kwslabels"), 1);
+
+ kwsResults.emplace_back(
+ kws::KwsResult(
+ kwsClassificationResult,
+ audioDataSlider.Index() * kwsAudioParamsSecondsPerSample * kwsAudioDataStride,
+ audioDataSlider.Index(), kwsScoreThreshold)
+ );
+
+ /* Keyword detected. */
+ if (kwsClassificationResult[0].m_labelIdx == ctx.Get<uint32_t>("keywordindex")) {
+ output.asrAudioStart = inferenceWindow + kwsAudioDataWindowSize;
+ output.asrAudioSamples = get_audio_array_size(currentIndex) -
+ (audioDataSlider.NextWindowStartIndex() -
+ kwsAudioDataStride + kwsAudioDataWindowSize);
+ break;
+ }
+
+#if VERIFY_TEST_OUTPUT
+ arm::app::DumpTensor(kwsOutputTensor);
+#endif /* VERIFY_TEST_OUTPUT */
+
+ } /* while (audioDataSlider.HasNext()) */
+
+ /* Erase. */
+ str_inf = std::string(str_inf.size(), ' ');
+ platform.data_psn->present_data_text(
+ str_inf.c_str(), str_inf.size(),
+ dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+
+ if (!_PresentInferenceResult(platform, kwsResults)) {
+ return output;
+ }
+
+ output.executionSuccess = true;
+ return output;
+ }
+
+ /**
+ * @brief Performs the ASR pipeline.
+ *
+ * @param ctx[in/out] pointer to the application context object
+ * @param kwsOutput[in] struct containing pointer to audio data where ASR should begin
+ * and how much data to process
+ * @return bool true if pipeline executed without failure
+ */
+ static bool doAsr(ApplicationContext& ctx, const KWSOutput& kwsOutput) {
+ constexpr uint32_t dataPsnTxtInfStartX = 20;
+ constexpr uint32_t dataPsnTxtInfStartY = 40;
+
+ auto& platform = ctx.Get<hal_platform&>("platform");
+ platform.data_psn->clear(COLOR_BLACK);
+
+ /* Get model reference. */
+ auto& asrModel = ctx.Get<Model&>("asrmodel");
+ if (!asrModel.IsInited()) {
+ printf_err("ASR model has not been initialised\n");
+ return false;
+ }
+
+ /* Get score threshold to be applied for the classifier (post-inference). */
+ auto asrScoreThreshold = ctx.Get<float>("asrscoreThreshold");
+
+ /* Dimensions of the tensor should have been verified by the callee. */
+ TfLiteTensor* asrInputTensor = asrModel.GetInputTensor(0);
+ TfLiteTensor* asrOutputTensor = asrModel.GetOutputTensor(0);
+ const uint32_t asrInputRows = asrInputTensor->dims->data[arm::app::Wav2LetterModel::ms_inputRowsIdx];
+
+ /* Populate ASR MFCC related parameters. */
+ auto asrMfccParamsWinLen = ctx.Get<uint32_t>("asrframeLength");
+ auto asrMfccParamsWinStride = ctx.Get<uint32_t>("asrframeStride");
+
+ /* Populate ASR inference context and inner lengths for input. */
+ auto asrInputCtxLen = ctx.Get<uint32_t>("ctxLen");
+ const uint32_t asrInputInnerLen = asrInputRows - (2 * asrInputCtxLen);
+
+ /* Make sure the input tensor supports the above context and inner lengths. */
+ if (asrInputRows <= 2 * asrInputCtxLen || asrInputRows <= asrInputInnerLen) {
+ printf_err("ASR input rows not compatible with ctx length %u\n", asrInputCtxLen);
+ return false;
+ }
+
+ /* Audio data stride corresponds to inputInnerLen feature vectors. */
+ const uint32_t asrAudioParamsWinLen = (asrInputRows - 1) *
+ asrMfccParamsWinStride + (asrMfccParamsWinLen);
+ const uint32_t asrAudioParamsWinStride = asrInputInnerLen * asrMfccParamsWinStride;
+ const float asrAudioParamsSecondsPerSample =
+ (1.0/audio::Wav2LetterMFCC::ms_defaultSamplingFreq);
+
+ /* Get pre/post-processing objects */
+ auto& asrPrep = ctx.Get<audio::asr::Preprocess&>("preprocess");
+ auto& asrPostp = ctx.Get<audio::asr::Postprocess&>("postprocess");
+
+ /* Set default reduction axis for post-processing. */
+ const uint32_t reductionAxis = arm::app::Wav2LetterModel::ms_outputRowsIdx;
+
+ /* Get the remaining audio buffer and respective size from KWS results. */
+ const int16_t* audioArr = kwsOutput.asrAudioStart;
+ const uint32_t audioArrSize = kwsOutput.asrAudioSamples;
+
+ /* Audio clip must have enough samples to produce 1 MFCC feature. */
+ std::vector<int16_t> audioBuffer = std::vector<int16_t>(audioArr, audioArr + audioArrSize);
+ if (audioArrSize < asrMfccParamsWinLen) {
+ printf_err("Not enough audio samples, minimum needed is %u\n", asrMfccParamsWinLen);
+ return false;
+ }
+
+ /* Initialise an audio slider. */
+ auto audioDataSlider = audio::ASRSlidingWindow<const int16_t>(
+ audioBuffer.data(),
+ audioBuffer.size(),
+ asrAudioParamsWinLen,
+ asrAudioParamsWinStride);
+
+ /* Declare a container for results. */
+ std::vector<arm::app::asr::AsrResult> asrResults;
+
+ /* Display message on the LCD - inference running. */
+ std::string str_inf{"Running ASR inference... "};
+ platform.data_psn->present_data_text(
+ str_inf.c_str(), str_inf.size(),
+ dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+
+ size_t asrInferenceWindowLen = asrAudioParamsWinLen;
+
+ /* Start sliding through audio clip. */
+ while (audioDataSlider.HasNext()) {
+
+ /* If not enough audio see how much can be sent for processing. */
+ size_t nextStartIndex = audioDataSlider.NextWindowStartIndex();
+ if (nextStartIndex + asrAudioParamsWinLen > audioBuffer.size()) {
+ asrInferenceWindowLen = audioBuffer.size() - nextStartIndex;
+ }
+
+ const int16_t* asrInferenceWindow = audioDataSlider.Next();
+
+ info("Inference %zu/%zu\n", audioDataSlider.Index() + 1,
+ static_cast<size_t>(ceilf(audioDataSlider.FractionalTotalStrides() + 1)));
+
+ Profiler prepProfiler{&platform, "pre-processing"};
+ prepProfiler.StartProfiling();
+
+ /* Calculate MFCCs, deltas and populate the input tensor. */
+ asrPrep.Invoke(asrInferenceWindow, asrInferenceWindowLen, asrInputTensor);
+
+ prepProfiler.StopProfiling();
+ std::string prepProfileResults = prepProfiler.GetResultsAndReset();
+ info("%s\n", prepProfileResults.c_str());
+
+ /* Run inference over this audio clip sliding window. */
+ arm::app::RunInference(platform, asrModel);
+
+ /* Post-process. */
+ asrPostp.Invoke(asrOutputTensor, reductionAxis, !audioDataSlider.HasNext());
+
+ /* Get results. */
+ std::vector<ClassificationResult> asrClassificationResult;
+ auto& asrClassifier = ctx.Get<AsrClassifier&>("asrclassifier");
+ asrClassifier.GetClassificationResults(
+ asrOutputTensor, asrClassificationResult,
+ ctx.Get<std::vector<std::string>&>("asrlabels"), 1);
+
+ asrResults.emplace_back(asr::AsrResult(asrClassificationResult,
+ (audioDataSlider.Index() *
+ asrAudioParamsSecondsPerSample *
+ asrAudioParamsWinStride),
+ audioDataSlider.Index(), asrScoreThreshold));
+
+#if VERIFY_TEST_OUTPUT
+ arm::app::DumpTensor(asrOutputTensor, asrOutputTensor->dims->data[arm::app::Wav2LetterModel::ms_outputColsIdx]);
+#endif /* VERIFY_TEST_OUTPUT */
+
+ /* Erase */
+ str_inf = std::string(str_inf.size(), ' ');
+ platform.data_psn->present_data_text(
+ str_inf.c_str(), str_inf.size(),
+ dataPsnTxtInfStartX, dataPsnTxtInfStartY, false);
+ }
+ if (!_PresentInferenceResult(platform, asrResults)) {
+ return false;
+ }
+
+ return true;
+ }
+
+ /* Audio inference classification handler. */
+ bool ClassifyAudioHandler(ApplicationContext& ctx, uint32_t clipIndex, bool runAll)
+ {
+ auto& platform = ctx.Get<hal_platform&>("platform");
+ platform.data_psn->clear(COLOR_BLACK);
+
+ /* If the request has a valid size, set the audio index. */
+ if (clipIndex < NUMBER_OF_FILES) {
+ if (!_SetAppCtxClipIdx(ctx, clipIndex)) {
+ return false;
+ }
+ }
+
+ auto startClipIdx = ctx.Get<uint32_t>("clipIndex");
+
+ do {
+ KWSOutput kwsOutput = doKws(ctx);
+ if (!kwsOutput.executionSuccess) {
+ return false;
+ }
+
+ if (kwsOutput.asrAudioStart != nullptr && kwsOutput.asrAudioSamples > 0) {
+ info("Keyword spotted\n");
+ if(!doAsr(ctx, kwsOutput)) {
+ printf_err("ASR failed");
+ return false;
+ }
+ }
+
+ _IncrementAppCtxClipIdx(ctx);
+
+ } while (runAll && ctx.Get<uint32_t>("clipIndex") != startClipIdx);
+
+ return true;
+ }
+
+ static void _IncrementAppCtxClipIdx(ApplicationContext& ctx)
+ {
+ auto curAudioIdx = ctx.Get<uint32_t>("clipIndex");
+
+ if (curAudioIdx + 1 >= NUMBER_OF_FILES) {
+ ctx.Set<uint32_t>("clipIndex", 0);
+ return;
+ }
+ ++curAudioIdx;
+ ctx.Set<uint32_t>("clipIndex", curAudioIdx);
+ }
+
+ static bool _SetAppCtxClipIdx(ApplicationContext& ctx, const uint32_t idx)
+ {
+ if (idx >= NUMBER_OF_FILES) {
+ printf_err("Invalid idx %u (expected less than %u)\n",
+ idx, NUMBER_OF_FILES);
+ return false;
+ }
+ ctx.Set<uint32_t>("clipIndex", idx);
+ return true;
+ }
+
+ static bool _PresentInferenceResult(hal_platform& platform,
+ std::vector<arm::app::kws::KwsResult>& results)
+ {
+ constexpr uint32_t dataPsnTxtStartX1 = 20;
+ constexpr uint32_t dataPsnTxtStartY1 = 30;
+ constexpr uint32_t dataPsnTxtYIncr = 16; /* Row index increment. */
+
+ platform.data_psn->set_text_color(COLOR_GREEN);
+
+ /* Display each result. */
+ uint32_t rowIdx1 = dataPsnTxtStartY1 + 2 * dataPsnTxtYIncr;
+
+ for (uint32_t i = 0; i < results.size(); ++i) {
+
+ std::string topKeyword{"<none>"};
+ float score = 0.f;
+
+ if (results[i].m_resultVec.size()) {
+ topKeyword = results[i].m_resultVec[0].m_label;
+ score = results[i].m_resultVec[0].m_normalisedVal;
+ }
+
+ std::string resultStr =
+ std::string{"@"} + std::to_string(results[i].m_timeStamp) +
+ std::string{"s: "} + topKeyword + std::string{" ("} +
+ std::to_string(static_cast<int>(score * 100)) + std::string{"%)"};
+
+ platform.data_psn->present_data_text(
+ resultStr.c_str(), resultStr.size(),
+ dataPsnTxtStartX1, rowIdx1, 0);
+ rowIdx1 += dataPsnTxtYIncr;
+
+ info("For timestamp: %f (inference #: %u); threshold: %f\n",
+ results[i].m_timeStamp, results[i].m_inferenceNumber,
+ results[i].m_threshold);
+ for (uint32_t j = 0; j < results[i].m_resultVec.size(); ++j) {
+ info("\t\tlabel @ %u: %s, score: %f\n", j,
+ results[i].m_resultVec[j].m_label.c_str(),
+ results[i].m_resultVec[j].m_normalisedVal);
+ }
+ }
+
+ return true;
+ }
+
+ static bool _PresentInferenceResult(hal_platform& platform, std::vector<arm::app::asr::AsrResult>& results)
+ {
+ constexpr uint32_t dataPsnTxtStartX1 = 20;
+ constexpr uint32_t dataPsnTxtStartY1 = 80;
+ constexpr bool allow_multiple_lines = true;
+
+ platform.data_psn->set_text_color(COLOR_GREEN);
+
+ /* Results from multiple inferences should be combined before processing. */
+ std::vector<arm::app::ClassificationResult> combinedResults;
+ for (auto& result : results) {
+ combinedResults.insert(combinedResults.end(),
+ result.m_resultVec.begin(),
+ result.m_resultVec.end());
+ }
+
+ for (auto& result : results) {
+ /* Get the final result string using the decoder. */
+ std::string infResultStr = audio::asr::DecodeOutput(result.m_resultVec);
+
+ info("Result for inf %u: %s\n", result.m_inferenceNumber,
+ infResultStr.c_str());
+ }
+
+ std::string finalResultStr = audio::asr::DecodeOutput(combinedResults);
+
+ platform.data_psn->present_data_text(
+ finalResultStr.c_str(), finalResultStr.size(),
+ dataPsnTxtStartX1, dataPsnTxtStartY1, allow_multiple_lines);
+
+ info("Final result: %s\n", finalResultStr.c_str());
+ return true;
+ }
+
+ /**
+ * @brief Generic feature calculator factory.
+ *
+ * Returns lambda function to compute features using features cache.
+ * Real features math is done by a lambda function provided as a parameter.
+ * Features are written to input tensor memory.
+ *
+ * @tparam T feature vector type.
+ * @param inputTensor model input tensor pointer.
+ * @param cacheSize number of feature vectors to cache. Defined by the sliding window overlap.
+ * @param compute features calculator function.
+ * @return lambda function to compute features.
+ **/
+ template<class T>
+ std::function<void (std::vector<int16_t>&, size_t, bool, size_t)>
+ _FeatureCalc(TfLiteTensor* inputTensor, size_t cacheSize,
+ std::function<std::vector<T> (std::vector<int16_t>& )> compute)
+ {
+ /* Feature cache to be captured by lambda function. */
+ static std::vector<std::vector<T>> featureCache = std::vector<std::vector<T>>(cacheSize);
+
+ return [=](std::vector<int16_t>& audioDataWindow,
+ size_t index,
+ bool useCache,
+ size_t featuresOverlapIndex)
+ {
+ T* tensorData = tflite::GetTensorData<T>(inputTensor);
+ std::vector<T> features;
+
+ /* Reuse features from cache if cache is ready and sliding windows overlap.
+ * Overlap is in the beginning of sliding window with a size of a feature cache.
+ */
+ if (useCache && index < featureCache.size()) {
+ features = std::move(featureCache[index]);
+ } else {
+ features = std::move(compute(audioDataWindow));
+ }
+ auto size = features.size();
+ auto sizeBytes = sizeof(T) * size;
+ std::memcpy(tensorData + (index * size), features.data(), sizeBytes);
+
+ /* Start renewing cache as soon iteration goes out of the windows overlap. */
+ if (index >= featuresOverlapIndex) {
+ featureCache[index - featuresOverlapIndex] = std::move(features);
+ }
+ };
+ }
+
+ template std::function<void (std::vector<int16_t>&, size_t , bool, size_t)>
+ _FeatureCalc<int8_t>(TfLiteTensor* inputTensor,
+ size_t cacheSize,
+ std::function<std::vector<int8_t> (std::vector<int16_t>& )> compute);
+
+ template std::function<void (std::vector<int16_t>&, size_t , bool, size_t)>
+ _FeatureCalc<uint8_t>(TfLiteTensor* inputTensor,
+ size_t cacheSize,
+ std::function<std::vector<uint8_t> (std::vector<int16_t>& )> compute);
+
+ template std::function<void (std::vector<int16_t>&, size_t , bool, size_t)>
+ _FeatureCalc<int16_t>(TfLiteTensor* inputTensor,
+ size_t cacheSize,
+ std::function<std::vector<int16_t> (std::vector<int16_t>& )> compute);
+
+ template std::function<void(std::vector<int16_t>&, size_t, bool, size_t)>
+ _FeatureCalc<float>(TfLiteTensor* inputTensor,
+ size_t cacheSize,
+ std::function<std::vector<float>(std::vector<int16_t>&)> compute);
+
+
+ static std::function<void (std::vector<int16_t>&, int, bool, size_t)>
+ GetFeatureCalculator(audio::DsCnnMFCC& mfcc, TfLiteTensor* inputTensor, size_t cacheSize)
+ {
+ std::function<void (std::vector<int16_t>&, size_t, bool, size_t)> mfccFeatureCalc;
+
+ TfLiteQuantization quant = inputTensor->quantization;
+
+ if (kTfLiteAffineQuantization == quant.type) {
+
+ auto* quantParams = (TfLiteAffineQuantization*) quant.params;
+ const float quantScale = quantParams->scale->data[0];
+ const int quantOffset = quantParams->zero_point->data[0];
+
+ switch (inputTensor->type) {
+ case kTfLiteInt8: {
+ mfccFeatureCalc = _FeatureCalc<int8_t>(inputTensor,
+ cacheSize,
+ [=, &mfcc](std::vector<int16_t>& audioDataWindow) {
+ return mfcc.MfccComputeQuant<int8_t>(audioDataWindow,
+ quantScale,
+ quantOffset);
+ }
+ );
+ break;
+ }
+ case kTfLiteUInt8: {
+ mfccFeatureCalc = _FeatureCalc<uint8_t>(inputTensor,
+ cacheSize,
+ [=, &mfcc](std::vector<int16_t>& audioDataWindow) {
+ return mfcc.MfccComputeQuant<uint8_t>(audioDataWindow,
+ quantScale,
+ quantOffset);
+ }
+ );
+ break;
+ }
+ case kTfLiteInt16: {
+ mfccFeatureCalc = _FeatureCalc<int16_t>(inputTensor,
+ cacheSize,
+ [=, &mfcc](std::vector<int16_t>& audioDataWindow) {
+ return mfcc.MfccComputeQuant<int16_t>(audioDataWindow,
+ quantScale,
+ quantOffset);
+ }
+ );
+ break;
+ }
+ default:
+ printf_err("Tensor type %s not supported\n", TfLiteTypeGetName(inputTensor->type));
+ }
+
+
+ } else {
+ mfccFeatureCalc = mfccFeatureCalc = _FeatureCalc<float>(inputTensor,
+ cacheSize,
+ [&mfcc](std::vector<int16_t>& audioDataWindow) {
+ return mfcc.MfccCompute(audioDataWindow);
+ });
+ }
+ return mfccFeatureCalc;
+ }
+} /* namespace app */
+} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/src/Wav2LetterMfcc.cc b/source/use_case/kws_asr/src/Wav2LetterMfcc.cc
new file mode 100644
index 0000000..80e4a26
--- /dev/null
+++ b/source/use_case/kws_asr/src/Wav2LetterMfcc.cc
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Wav2LetterMfcc.hpp"
+
+#include "PlatformMath.hpp"
+
+#include <cfloat>
+
+namespace arm {
+namespace app {
+namespace audio {
+
+ bool Wav2LetterMFCC::ApplyMelFilterBank(
+ std::vector<float>& fftVec,
+ std::vector<std::vector<float>>& melFilterBank,
+ std::vector<int32_t>& filterBankFilterFirst,
+ std::vector<int32_t>& filterBankFilterLast,
+ std::vector<float>& melEnergies)
+ {
+ const size_t numBanks = melEnergies.size();
+
+ if (numBanks != filterBankFilterFirst.size() ||
+ numBanks != filterBankFilterLast.size()) {
+ printf_err("unexpected filter bank lengths\n");
+ return false;
+ }
+
+ for (size_t bin = 0; bin < numBanks; ++bin) {
+ auto filterBankIter = melFilterBank[bin].begin();
+ float melEnergy = 1e-10; /* Avoid log of zero at later stages, same value used in librosa. */
+ const int32_t firstIndex = filterBankFilterFirst[bin];
+ const int32_t lastIndex = filterBankFilterLast[bin];
+
+ for (int32_t i = firstIndex; i <= lastIndex; ++i) {
+ melEnergy += (*filterBankIter++ * fftVec[i]);
+ }
+
+ melEnergies[bin] = melEnergy;
+ }
+
+ return true;
+ }
+
+ void Wav2LetterMFCC::ConvertToLogarithmicScale(
+ std::vector<float>& melEnergies)
+ {
+ float maxMelEnergy = -FLT_MAX;
+
+ /* Container for natural logarithms of mel energies. */
+ std::vector <float> vecLogEnergies(melEnergies.size(), 0.f);
+
+ /* Because we are taking natural logs, we need to multiply by log10(e).
+ * Also, for wav2letter model, we scale our log10 values by 10. */
+ constexpr float multiplier = 10.0 * /* Default scalar. */
+ 0.4342944819032518; /* log10f(std::exp(1.0))*/
+
+ /* Take log of the whole vector. */
+ math::MathUtils::VecLogarithmF32(melEnergies, vecLogEnergies);
+
+ /* Scale the log values and get the max. */
+ for (auto iterM = melEnergies.begin(), iterL = vecLogEnergies.begin();
+ iterM != melEnergies.end(); ++iterM, ++iterL) {
+
+ *iterM = *iterL * multiplier;
+
+ /* Save the max mel energy. */
+ if (*iterM > maxMelEnergy) {
+ maxMelEnergy = *iterM;
+ }
+ }
+
+ /* Clamp the mel energies. */
+ constexpr float maxDb = 80.0;
+ const float clampLevelLowdB = maxMelEnergy - maxDb;
+ for (auto iter = melEnergies.begin(); iter != melEnergies.end(); ++iter) {
+ *iter = std::max(*iter, clampLevelLowdB);
+ }
+ }
+
+ std::vector<float> Wav2LetterMFCC::CreateDCTMatrix(
+ const int32_t inputLength,
+ const int32_t coefficientCount)
+ {
+ std::vector<float> dctMatix(inputLength * coefficientCount);
+
+ /* Orthonormal normalization. */
+ const float normalizerK0 = 2 * math::MathUtils::SqrtF32(1.0f /
+ static_cast<float>(4*inputLength));
+ const float normalizer = 2 * math::MathUtils::SqrtF32(1.0f /
+ static_cast<float>(2*inputLength));
+
+ const float angleIncr = M_PI/inputLength;
+ float angle = angleIncr; /* We start using it at k = 1 loop. */
+
+ /* First row of DCT will use normalizer K0 */
+ for (int32_t n = 0; n < inputLength; ++n) {
+ dctMatix[n] = normalizerK0 /* cos(0) = 1 */;
+ }
+
+ /* Second row (index = 1) onwards, we use standard normalizer. */
+ for (int32_t k = 1, m = inputLength; k < coefficientCount; ++k, m += inputLength) {
+ for (int32_t n = 0; n < inputLength; ++n) {
+ dctMatix[m+n] = normalizer *
+ math::MathUtils::CosineF32((n + 0.5f) * angle);
+ }
+ angle += angleIncr;
+ }
+ return dctMatix;
+ }
+
+ float Wav2LetterMFCC::GetMelFilterBankNormaliser(
+ const float& leftMel,
+ const float& rightMel,
+ const bool useHTKMethod)
+ {
+ /* Slaney normalization for mel weights. */
+ return (2.0f / (MFCC::InverseMelScale(rightMel, useHTKMethod) -
+ MFCC::InverseMelScale(leftMel, useHTKMethod)));
+ }
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/use_case/kws_asr/src/Wav2LetterModel.cc b/source/use_case/kws_asr/src/Wav2LetterModel.cc
new file mode 100644
index 0000000..2114a3f
--- /dev/null
+++ b/source/use_case/kws_asr/src/Wav2LetterModel.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Wav2LetterModel.hpp"
+
+#include "hal.h"
+
+namespace arm {
+namespace app {
+namespace asr {
+ extern uint8_t* GetModelPointer();
+ extern size_t GetModelLen();
+}
+} /* namespace app */
+} /* namespace arm */
+
+const tflite::MicroOpResolver& arm::app::Wav2LetterModel::GetOpResolver()
+{
+ return this->_m_opResolver;
+}
+
+bool arm::app::Wav2LetterModel::EnlistOperations()
+{
+ this->_m_opResolver.AddConv2D();
+ this->_m_opResolver.AddMul();
+ this->_m_opResolver.AddMaximum();
+ this->_m_opResolver.AddReshape();
+
+#if defined(ARM_NPU)
+ if (kTfLiteOk == this->_m_opResolver.AddEthosU()) {
+ info("Added %s support to op resolver\n",
+ tflite::GetString_ETHOSU());
+ } else {
+ printf_err("Failed to add Arm NPU support to op resolver.");
+ return false;
+ }
+#endif /* ARM_NPU */
+ return true;
+}
+
+const uint8_t* arm::app::Wav2LetterModel::ModelPointer()
+{
+ return arm::app::asr::GetModelPointer();
+}
+
+size_t arm::app::Wav2LetterModel::ModelSize()
+{
+ return arm::app::asr::GetModelLen();
+} \ No newline at end of file
diff --git a/source/use_case/kws_asr/src/Wav2LetterPostprocess.cc b/source/use_case/kws_asr/src/Wav2LetterPostprocess.cc
new file mode 100644
index 0000000..b173968
--- /dev/null
+++ b/source/use_case/kws_asr/src/Wav2LetterPostprocess.cc
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Wav2LetterPostprocess.hpp"
+
+#include "Wav2LetterModel.hpp"
+
+namespace arm {
+namespace app {
+namespace audio {
+namespace asr {
+
+ Postprocess::Postprocess(const uint32_t contextLen,
+ const uint32_t innerLen,
+ const uint32_t blankTokenIdx)
+ : _m_contextLen(contextLen),
+ _m_innerLen(innerLen),
+ _m_totalLen(2 * this->_m_contextLen + this->_m_innerLen),
+ _m_countIterations(0),
+ _m_blankTokenIdx(blankTokenIdx)
+ {}
+
+ bool Postprocess::Invoke(TfLiteTensor* tensor,
+ const uint32_t axisIdx,
+ const bool lastIteration)
+ {
+ /* Basic checks. */
+ if (!this->_IsInputValid(tensor, axisIdx)) {
+ return false;
+ }
+
+ /* Irrespective of tensor type, we use unsigned "byte" */
+ uint8_t* ptrData = tflite::GetTensorData<uint8_t>(tensor);
+ const uint32_t elemSz = this->_GetTensorElementSize(tensor);
+
+ /* Other sanity checks. */
+ if (0 == elemSz) {
+ printf_err("Tensor type not supported for post processing\n");
+ return false;
+ } else if (elemSz * this->_m_totalLen > tensor->bytes) {
+ printf_err("Insufficient number of tensor bytes\n");
+ return false;
+ }
+
+ /* Which axis do we need to process? */
+ switch (axisIdx) {
+ case arm::app::Wav2LetterModel::ms_outputRowsIdx:
+ return this->_EraseSectionsRowWise(ptrData,
+ elemSz * tensor->dims->data[arm::app::Wav2LetterModel::ms_outputColsIdx],
+ lastIteration);
+ default:
+ printf_err("Unsupported axis index: %u\n", axisIdx);
+ }
+
+ return false;
+ }
+
+ bool Postprocess::_IsInputValid(TfLiteTensor* tensor,
+ const uint32_t axisIdx) const
+ {
+ if (nullptr == tensor) {
+ return false;
+ }
+
+ if (static_cast<int>(axisIdx) >= tensor->dims->size) {
+ printf_err("Invalid axis index: %u; Max: %d\n",
+ axisIdx, tensor->dims->size);
+ return false;
+ }
+
+ if (static_cast<int>(this->_m_totalLen) !=
+ tensor->dims->data[axisIdx]) {
+ printf_err("Unexpected tensor dimension for axis %d, \n",
+ tensor->dims->data[axisIdx]);
+ return false;
+ }
+
+ return true;
+ }
+
+ uint32_t Postprocess::_GetTensorElementSize(TfLiteTensor* tensor)
+ {
+ switch(tensor->type) {
+ case kTfLiteUInt8:
+ return 1;
+ case kTfLiteInt8:
+ return 1;
+ case kTfLiteInt16:
+ return 2;
+ case kTfLiteInt32:
+ return 4;
+ case kTfLiteFloat32:
+ return 4;
+ default:
+ printf_err("Unsupported tensor type %s\n",
+ TfLiteTypeGetName(tensor->type));
+ }
+
+ return 0;
+ }
+
+ bool Postprocess::_EraseSectionsRowWise(
+ uint8_t* ptrData,
+ const uint32_t strideSzBytes,
+ const bool lastIteration)
+ {
+ /* In this case, the "zero-ing" is quite simple as the region
+ * to be zeroed sits in contiguous memory (row-major). */
+ const uint32_t eraseLen = strideSzBytes * this->_m_contextLen;
+
+ /* Erase left context? */
+ if (this->_m_countIterations > 0) {
+ /* Set output of each classification window to the blank token. */
+ std::memset(ptrData, 0, eraseLen);
+ for (size_t windowIdx = 0; windowIdx < this->_m_contextLen; windowIdx++) {
+ ptrData[windowIdx*strideSzBytes + this->_m_blankTokenIdx] = 1;
+ }
+ }
+
+ /* Erase right context? */
+ if (false == lastIteration) {
+ uint8_t * rightCtxPtr = ptrData + (strideSzBytes * (this->_m_contextLen + this->_m_innerLen));
+ /* Set output of each classification window to the blank token. */
+ std::memset(rightCtxPtr, 0, eraseLen);
+ for (size_t windowIdx = 0; windowIdx < this->_m_contextLen; windowIdx++) {
+ rightCtxPtr[windowIdx*strideSzBytes + this->_m_blankTokenIdx] = 1;
+ }
+ }
+
+ if (lastIteration) {
+ this->_m_countIterations = 0;
+ } else {
+ ++this->_m_countIterations;
+ }
+
+ return true;
+ }
+
+} /* namespace asr */
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/src/Wav2LetterPreprocess.cc b/source/use_case/kws_asr/src/Wav2LetterPreprocess.cc
new file mode 100644
index 0000000..613ddb0
--- /dev/null
+++ b/source/use_case/kws_asr/src/Wav2LetterPreprocess.cc
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Wav2LetterPreprocess.hpp"
+
+#include "PlatformMath.hpp"
+#include "TensorFlowLiteMicro.hpp"
+
+#include <algorithm>
+#include <math.h>
+
+namespace arm {
+namespace app {
+namespace audio {
+namespace asr {
+
+ Preprocess::Preprocess(
+ const uint32_t numMfccFeatures,
+ const uint32_t windowLen,
+ const uint32_t windowStride,
+ const uint32_t numMfccVectors):
+ _m_mfcc(numMfccFeatures, windowLen),
+ _m_mfccBuf(numMfccFeatures, numMfccVectors),
+ _m_delta1Buf(numMfccFeatures, numMfccVectors),
+ _m_delta2Buf(numMfccFeatures, numMfccVectors),
+ _m_windowLen(windowLen),
+ _m_windowStride(windowStride),
+ _m_numMfccFeats(numMfccFeatures),
+ _m_numFeatVectors(numMfccVectors),
+ _m_window()
+ {
+ if (numMfccFeatures > 0 && windowLen > 0) {
+ this->_m_mfcc.Init();
+ }
+ }
+
+ bool Preprocess::Invoke(
+ const int16_t* audioData,
+ const uint32_t audioDataLen,
+ TfLiteTensor* tensor)
+ {
+ this->_m_window = SlidingWindow<const int16_t>(
+ audioData, audioDataLen,
+ this->_m_windowLen, this->_m_windowStride);
+
+ uint32_t mfccBufIdx = 0;
+
+ std::fill(_m_mfccBuf.begin(), _m_mfccBuf.end(), 0.f);
+ std::fill(_m_delta1Buf.begin(), _m_delta1Buf.end(), 0.f);
+ std::fill(_m_delta2Buf.begin(), _m_delta2Buf.end(), 0.f);
+
+ /* While we can slide over the window. */
+ while (this->_m_window.HasNext()) {
+ const int16_t* mfccWindow = this->_m_window.Next();
+ auto mfccAudioData = std::vector<int16_t>(
+ mfccWindow,
+ mfccWindow + this->_m_windowLen);
+ auto mfcc = this->_m_mfcc.MfccCompute(mfccAudioData);
+ for (size_t i = 0; i < this->_m_mfccBuf.size(0); ++i) {
+ this->_m_mfccBuf(i, mfccBufIdx) = mfcc[i];
+ }
+ ++mfccBufIdx;
+ }
+
+ /* Pad MFCC if needed by adding MFCC for zeros. */
+ if (mfccBufIdx != this->_m_numFeatVectors) {
+ std::vector<int16_t> zerosWindow = std::vector<int16_t>(this->_m_windowLen, 0);
+ std::vector<float> mfccZeros = this->_m_mfcc.MfccCompute(zerosWindow);
+
+ while (mfccBufIdx != this->_m_numFeatVectors) {
+ memcpy(&this->_m_mfccBuf(0, mfccBufIdx),
+ mfccZeros.data(), sizeof(float) * _m_numMfccFeats);
+ ++mfccBufIdx;
+ }
+ }
+
+ /* Compute first and second order deltas from MFCCs. */
+ this->_ComputeDeltas(this->_m_mfccBuf,
+ this->_m_delta1Buf,
+ this->_m_delta2Buf);
+
+ /* Normalise. */
+ this->_Normalise();
+
+ /* Quantise. */
+ QuantParams quantParams = GetTensorQuantParams(tensor);
+
+ if (0 == quantParams.scale) {
+ printf_err("Quantisation scale can't be 0\n");
+ return false;
+ }
+
+ switch(tensor->type) {
+ case kTfLiteUInt8:
+ return this->_Quantise<uint8_t>(
+ tflite::GetTensorData<uint8_t>(tensor), tensor->bytes,
+ quantParams.scale, quantParams.offset);
+ case kTfLiteInt8:
+ return this->_Quantise<int8_t>(
+ tflite::GetTensorData<int8_t>(tensor), tensor->bytes,
+ quantParams.scale, quantParams.offset);
+ default:
+ printf_err("Unsupported tensor type %s\n",
+ TfLiteTypeGetName(tensor->type));
+ }
+
+ return false;
+ }
+
+ bool Preprocess::_ComputeDeltas(Array2d<float>& mfcc,
+ Array2d<float>& delta1,
+ Array2d<float>& delta2)
+ {
+ const std::vector <float> delta1Coeffs =
+ {6.66666667e-02, 5.00000000e-02, 3.33333333e-02,
+ 1.66666667e-02, -3.46944695e-18, -1.66666667e-02,
+ -3.33333333e-02, -5.00000000e-02, -6.66666667e-02};
+
+ const std::vector <float> delta2Coeffs =
+ {0.06060606, 0.01515152, -0.01731602,
+ -0.03679654, -0.04329004, -0.03679654,
+ -0.01731602, 0.01515152, 0.06060606};
+
+ if (delta1.size(0) == 0 || delta2.size(0) != delta1.size(0) ||
+ mfcc.size(0) == 0 || mfcc.size(1) == 0) {
+ return false;
+ }
+
+ /* Get the middle index; coeff vec len should always be odd. */
+ const size_t coeffLen = delta1Coeffs.size();
+ const size_t fMidIdx = (coeffLen - 1)/2;
+ const size_t numFeatures = mfcc.size(0);
+ const size_t numFeatVectors = mfcc.size(1);
+
+ /* Iterate through features in MFCC vector. */
+ for (size_t i = 0; i < numFeatures; ++i) {
+ /* For each feature, iterate through time (t) samples representing feature evolution and
+ * calculate d/dt and d^2/dt^2, using 1d convolution with differential kernels.
+ * Convolution padding = valid, result size is `time length - kernel length + 1`.
+ * The result is padded with 0 from both sides to match the size of initial time samples data.
+ *
+ * For the small filter, conv1d implementation as a simple loop is efficient enough.
+ * Filters of a greater size would need CMSIS-DSP functions to be used, like arm_fir_f32.
+ */
+
+ for (size_t j = fMidIdx; j < numFeatVectors - fMidIdx; ++j) {
+ float d1 = 0;
+ float d2 = 0;
+ const size_t mfccStIdx = j - fMidIdx;
+
+ for (size_t k = 0, m = coeffLen - 1; k < coeffLen; ++k, --m) {
+
+ d1 += mfcc(i,mfccStIdx + k) * delta1Coeffs[m];
+ d2 += mfcc(i,mfccStIdx + k) * delta2Coeffs[m];
+ }
+
+ delta1(i,j) = d1;
+ delta2(i,j) = d2;
+ }
+ }
+
+ return true;
+ }
+
+ float Preprocess::_GetMean(Array2d<float>& vec)
+ {
+ return math::MathUtils::MeanF32(vec.begin(), vec.totalSize());
+ }
+
+ float Preprocess::_GetStdDev(Array2d<float>& vec, const float mean)
+ {
+ return math::MathUtils::StdDevF32(vec.begin(), vec.totalSize(), mean);
+ }
+
+ void Preprocess::_NormaliseVec(Array2d<float>& vec)
+ {
+ auto mean = Preprocess::_GetMean(vec);
+ auto stddev = Preprocess::_GetStdDev(vec, mean);
+
+ debug("Mean: %f, Stddev: %f\n", mean, stddev);
+ if (stddev == 0) {
+ std::fill(vec.begin(), vec.end(), 0);
+ } else {
+ const float stddevInv = 1.f/stddev;
+ const float normalisedMean = mean/stddev;
+
+ auto NormalisingFunction = [=](float& value) {
+ value = value * stddevInv - normalisedMean;
+ };
+ std::for_each(vec.begin(), vec.end(), NormalisingFunction);
+ }
+ }
+
+ void Preprocess::_Normalise()
+ {
+ Preprocess::_NormaliseVec(this->_m_mfccBuf);
+ Preprocess::_NormaliseVec(this->_m_delta1Buf);
+ Preprocess::_NormaliseVec(this->_m_delta2Buf);
+ }
+
+ float Preprocess::_GetQuantElem(
+ const float elem,
+ const float quantScale,
+ const int quantOffset,
+ const float minVal,
+ const float maxVal)
+ {
+ float val = std::round((elem/quantScale) + quantOffset);
+ return std::min<float>(std::max<float>(val, minVal), maxVal);
+ }
+
+} /* namespace asr */
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/usecase.cmake b/source/use_case/kws_asr/usecase.cmake
new file mode 100644
index 0000000..f15bc73
--- /dev/null
+++ b/source/use_case/kws_asr/usecase.cmake
@@ -0,0 +1,259 @@
+#----------------------------------------------------------------------------
+# Copyright (c) 2021 Arm Limited. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#----------------------------------------------------------------------------
+
+# If the path to a directory or source file has been defined,
+# get the type here (FILEPATH or PATH):
+if (DEFINED ${use_case}_FILE_PATH)
+ get_path_type(${${use_case}_FILE_PATH} PATH_TYPE)
+
+ # Set the default type if path is not a dir or file path (or undefined)
+ if (NOT ${PATH_TYPE} STREQUAL PATH AND NOT ${PATH_TYPE} STREQUAL FILEPATH)
+ message(FATAL_ERROR "Invalid ${use_case}_FILE_PATH. It should be a dir or file path.")
+ endif()
+else()
+ # Default is a directory path
+ set(PATH_TYPE PATH)
+endif()
+
+message(STATUS "${use_case}_FILE_PATH is of type: ${PATH_TYPE}")
+
+USER_OPTION(${use_case}_FILE_PATH "Directory with WAV files, or path to a single WAV file, to use in the evaluation application."
+ ${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/samples/
+ ${PATH_TYPE})
+
+USER_OPTION(${use_case}_AUDIO_RATE "Specify the target sampling rate. Default is 16000."
+ 16000
+ STRING)
+
+USER_OPTION(${use_case}_AUDIO_MONO "Specify if the audio needs to be converted to mono. Default is ON."
+ ON
+ BOOL)
+
+USER_OPTION(${use_case}_AUDIO_OFFSET "Specify the offset to start reading after this time (in seconds). Default is 0."
+ 0
+ STRING)
+
+USER_OPTION(${use_case}_AUDIO_DURATION "Specify the audio duration to load (in seconds). If set to 0 the entire audio will be processed."
+ 0
+ STRING)
+
+USER_OPTION(${use_case}_AUDIO_RES_TYPE "Specify re-sampling algorithm to use. By default is 'kaiser_best'."
+ kaiser_best
+ STRING)
+
+USER_OPTION(${use_case}_AUDIO_MIN_SAMPLES "Specify the minimum number of samples to use. By default is 16000, if the audio is shorter will be automatically padded."
+ 16000
+ STRING)
+
+# Generate audio .cc files:
+generate_audio_code(${${use_case}_FILE_PATH} ${SRC_GEN_DIR} ${INC_GEN_DIR}
+ ${${use_case}_AUDIO_RATE}
+ ${${use_case}_AUDIO_MONO}
+ ${${use_case}_AUDIO_OFFSET}
+ ${${use_case}_AUDIO_DURATION}
+ ${${use_case}_AUDIO_RES_TYPE}
+ ${${use_case}_AUDIO_MIN_SAMPLES})
+
+# Generate kws labels file:
+USER_OPTION(${use_case}_LABELS_TXT_FILE_KWS "Labels' txt file for the chosen model."
+ ${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/labels/ds_cnn_labels.txt
+ FILEPATH)
+
+set(${use_case}_LABELS_CPP_FILE_KWS Labels_dscnn)
+generate_labels_code(
+ INPUT "${${use_case}_LABELS_TXT_FILE_KWS}"
+ DESTINATION_SRC ${SRC_GEN_DIR}
+ DESTINATION_HDR ${INC_GEN_DIR}
+ OUTPUT_FILENAME "${${use_case}_LABELS_CPP_FILE_KWS}"
+ NAMESPACE "arm" "app" "kws"
+)
+
+# Generate asr labels file:
+USER_OPTION(${use_case}_LABELS_TXT_FILE_ASR "Labels' txt file for the chosen model."
+ ${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/labels/labels_wav2letter.txt
+ FILEPATH)
+
+set(${use_case}_LABELS_CPP_FILE_ASR Labels_wav2letter)
+generate_labels_code(
+ INPUT "${${use_case}_LABELS_TXT_FILE_ASR}"
+ DESTINATION_SRC ${SRC_GEN_DIR}
+ DESTINATION_HDR ${INC_GEN_DIR}
+ OUTPUT_FILENAME "${${use_case}_LABELS_CPP_FILE_ASR}"
+ NAMESPACE "arm" "app" "asr"
+)
+
+USER_OPTION(${use_case}_ACTIVATION_BUF_SZ "Activation buffer size for the chosen model"
+ 0x00200000
+ STRING)
+
+USER_OPTION(${use_case}_MODEL_SCORE_THRESHOLD_KWS "Specify the score threshold [0.0, 1.0) that must be applied to the KWS results for a label to be deemed valid."
+ 0.9
+ STRING)
+
+USER_OPTION(${use_case}_MODEL_SCORE_THRESHOLD_ASR "Specify the score threshold [0.0, 1.0) that must be applied to the ASR results for a label to be deemed valid."
+ 0.5
+ STRING)
+
+# If there is no tflite file pointed to
+if (NOT DEFINED ${use_case}_MODEL_TFLITE_PATH_KWS)
+
+ set(SUB_USECASE_KWS "kws")
+ set(MODEL_FILENAME_KWS ds_cnn_clustered_int8.tflite)
+ set(MODEL_RESOURCES_DIR_KWS ${DOWNLOAD_DEP_DIR}/${use_case})
+ file(MAKE_DIRECTORY ${MODEL_RESOURCES_DIR_KWS})
+ set(DEFAULT_MODEL_PATH_KWS ${MODEL_RESOURCES_DIR_KWS}/${MODEL_FILENAME_KWS})
+
+ # Download the default model
+ set(ZOO_COMMON_SUBPATH_KWS "models/keyword_spotting/ds_cnn_large/tflite_clustered_int8")
+ set(ZOO_MODEL_SUBPATH_KWS "${ZOO_COMMON_SUBPATH_KWS}/${MODEL_FILENAME_KWS}")
+
+ download_file_from_modelzoo(${ZOO_MODEL_SUBPATH_KWS} ${DEFAULT_MODEL_PATH_KWS})
+
+ if (ETHOS_U55_ENABLED)
+ message(STATUS
+ "Ethos-U55 is enabled, but the model downloaded is not optimized by vela. "
+ "To use Ethos-U55 acceleration, optimise the downloaded model and pass it "
+ "as ${use_case}_MODEL_TFLITE_PATH_KWS to the CMake configuration.")
+ endif()
+
+ if (${TARGET_PLATFORM} STREQUAL native)
+
+ # Download test vectors
+ set(ZOO_TEST_IFM_SUBPATH_KWS "${ZOO_COMMON_SUBPATH_KWS}/testing_input/input_2/0.npy")
+ set(ZOO_TEST_OFM_SUBPATH_KWS "${ZOO_COMMON_SUBPATH_KWS}/testing_output/Identity/0.npy")
+
+ file(MAKE_DIRECTORY ${MODEL_RESOURCES_DIR_KWS}/${SUB_USECASE_KWS})
+ set(${use_case}_TEST_IFM ${MODEL_RESOURCES_DIR_KWS}/${SUB_USECASE_KWS}/ifm0.npy CACHE FILEPATH
+ "Input test vector for ${use_case}-${SUB_USECASE_KWS}")
+ set(${use_case}_TEST_OFM ${MODEL_RESOURCES_DIR_KWS}/${SUB_USECASE_KWS}/ofm0.npy CACHE FILEPATH
+ "Input test vector for ${use_case}-${SUB_USECASE_KWS}.")
+
+ download_file_from_modelzoo(${ZOO_TEST_IFM_SUBPATH_KWS} ${${use_case}_TEST_IFM})
+ download_file_from_modelzoo(${ZOO_TEST_OFM_SUBPATH_KWS} ${${use_case}_TEST_OFM})
+ set(TEST_SRC_GEN_DIR ${CMAKE_BINARY_DIR}/generated/${use_case}/tests/src)
+ set(TEST_INC_GEN_DIR ${CMAKE_BINARY_DIR}/generated/${use_case}/tests/include)
+ file(MAKE_DIRECTORY ${TEST_SRC_GEN_DIR} ${TEST_INC_GEN_DIR})
+
+ generate_test_data_code(
+ INPUT_DIR "${DOWNLOAD_DEP_DIR}/${use_case}/${SUB_USECASE_KWS}"
+ DESTINATION_SRC ${TEST_SRC_GEN_DIR}
+ DESTINATION_HDR ${TEST_INC_GEN_DIR}
+ USECASE ${SUB_USECASE_KWS}
+ NAMESPACE "arm" "app" ${SUB_USECASE_KWS})
+ endif()
+
+else()
+ set(DEFAULT_MODEL_PATH_KWS "N/A")
+endif()
+
+set(EXTRA_MODEL_CODE_KWS
+ "/* Model parameters for ${use_case} */"
+ "extern const uint32_t g_NumMfcc = 10"
+ "extern const uint32_t g_NumAudioWins = 49"
+ "extern const int g_FrameLength = 640"
+ "extern const int g_FrameStride = 320"
+ "extern const float g_ScoreThreshold = ${${use_case}_MODEL_SCORE_THRESHOLD_KWS}"
+ )
+
+# If there is no tflite file pointed to
+if (NOT DEFINED ${use_case}_MODEL_TFLITE_PATH_ASR)
+
+ set(SUB_USECASE_ASR "asr")
+ set(MODEL_FILENAME_ASR wav2letter_int8.tflite)
+ set(MODEL_RESOURCES_DIR_ASR ${DOWNLOAD_DEP_DIR}/${use_case})
+ file(MAKE_DIRECTORY ${MODEL_RESOURCES_DIR_ASR})
+ set(DEFAULT_MODEL_PATH_ASR ${MODEL_RESOURCES_DIR_ASR}/${MODEL_FILENAME_ASR})
+
+ # Download the default model
+ set(ZOO_COMMON_SUBPATH_ASR "models/speech_recognition/wav2letter/tflite_int8")
+ set(ZOO_MODEL_SUBPATH_ASR "${ZOO_COMMON_SUBPATH_ASR}/${MODEL_FILENAME_ASR}")
+
+ download_file_from_modelzoo(${ZOO_MODEL_SUBPATH_ASR} ${DEFAULT_MODEL_PATH_ASR})
+
+ if (ETHOS_U55_ENABLED)
+ message(STATUS
+ "Ethos-U55 is enabled, but the model downloaded is not optimized by vela. "
+ "To use Ethos-U55 acceleration, optimise the downloaded model and pass it "
+ "as ${use_case}_MODEL_TFLITE_PATH to the CMake configuration.")
+ endif()
+
+ # If the target platform is native
+ if (${TARGET_PLATFORM} STREQUAL native)
+
+ # Download test vectors
+ set(ZOO_TEST_IFM_SUBPATH_ASR "${ZOO_COMMON_SUBPATH_ASR}/testing_input/input_2_int8/0.npy")
+ set(ZOO_TEST_OFM_SUBPATH_ASR "${ZOO_COMMON_SUBPATH_ASR}/testing_output/Identity_int8/0.npy")
+
+ file(MAKE_DIRECTORY ${MODEL_RESOURCES_DIR_ASR}/${SUB_USECASE_ASR})
+ set(${use_case}_TEST_IFM_ASR ${MODEL_RESOURCES_DIR_ASR}/${SUB_USECASE_ASR}/ifm0.npy CACHE FILEPATH
+ "Input test vector for ${use_case}-${SUB_USECASE_ASR}")
+ set(${use_case}_TEST_OFM_ASR ${MODEL_RESOURCES_DIR_ASR}/${SUB_USECASE_ASR}/ofm0.npy CACHE FILEPATH
+ "Input test vector for ${use_case}-${SUB_USECASE_ASR}")
+
+ download_file_from_modelzoo(${ZOO_TEST_IFM_SUBPATH_KWS} ${${use_case}_TEST_IFM_ASR})
+ download_file_from_modelzoo(${ZOO_TEST_OFM_SUBPATH_KWS} ${${use_case}_TEST_OFM_ASR})
+
+ set(TEST_SRC_GEN_DIR ${CMAKE_BINARY_DIR}/generated/${use_case}/tests/src)
+ set(TEST_INC_GEN_DIR ${CMAKE_BINARY_DIR}/generated/${use_case}/tests/include)
+ file(MAKE_DIRECTORY ${TEST_SRC_GEN_DIR} ${TEST_INC_GEN_DIR})
+
+ # Generate test data files to be included in x86 tests
+ generate_test_data_code(
+ INPUT_DIR "${DOWNLOAD_DEP_DIR}/${use_case}/${SUB_USECASE_ASR}"
+ DESTINATION_SRC ${TEST_SRC_GEN_DIR}
+ DESTINATION_HDR ${TEST_INC_GEN_DIR}
+ USECASE ${SUB_USECASE_ASR}
+ NAMESPACE "arm" "app" ${SUB_USECASE_ASR})
+ endif()
+
+else()
+ set(DEFAULT_MODEL_PATH_ASR "N/A")
+endif()
+
+set(EXTRA_MODEL_CODE_ASR
+ "/* Model parameters for ${use_case} */"
+ "extern const int g_FrameLength = 512"
+ "extern const int g_FrameStride = 160"
+ "extern const int g_ctxLen = 98"
+ "extern const float g_ScoreThreshold = ${${use_case}_MODEL_SCORE_THRESHOLD_ASR}"
+ )
+
+USER_OPTION(${use_case}_MODEL_TFLITE_PATH_KWS "NN models file to be used for KWS in the evaluation application. Model files must be in tflite format."
+ ${DEFAULT_MODEL_PATH_KWS}
+ FILEPATH
+ )
+
+USER_OPTION(${use_case}_MODEL_TFLITE_PATH_ASR "NN models file to be used for ASR in the evaluation application. Model files must be in tflite format."
+ ${DEFAULT_MODEL_PATH_ASR}
+ FILEPATH
+ )
+
+# Generate model file for KWS
+generate_tflite_code(
+ MODEL_PATH ${${use_case}_MODEL_TFLITE_PATH_KWS}
+ DESTINATION ${SRC_GEN_DIR}
+ EXPRESSIONS ${EXTRA_MODEL_CODE_KWS}
+ NAMESPACE "arm" "app" "kws"
+)
+
+# and for ASR
+generate_tflite_code(
+ MODEL_PATH ${${use_case}_MODEL_TFLITE_PATH_ASR}
+ DESTINATION ${SRC_GEN_DIR}
+ EXPRESSIONS ${EXTRA_MODEL_CODE_ASR}
+ NAMESPACE "arm" "app" "asr"
+)