summaryrefslogtreecommitdiff
path: root/source/use_case
diff options
context:
space:
mode:
authorKshitij Sisodia <kshitij.sisodia@arm.com>2022-05-06 09:13:03 +0100
committerKshitij Sisodia <kshitij.sisodia@arm.com>2022-05-06 17:11:41 +0100
commitaa4bcb14d0cbee910331545dd2fc086b58c37170 (patch)
treee67a43a43f61c6f8b6aad19018b0827baf7e31a6 /source/use_case
parentfcca863bafd5f33522bc14c23dde4540e264ec94 (diff)
downloadml-embedded-evaluation-kit-aa4bcb14d0cbee910331545dd2fc086b58c37170.tar.gz
MLECO-3183: Refactoring application sources
Platform agnostic application sources are moved into application api module with their own independent CMake projects. Changes for MLECO-3080 also included - they create CMake projects individial API's (again, platform agnostic) that dependent on the common logic. The API for KWS_API "joint" API has been removed and now the use case relies on individual KWS, and ASR API libraries. Change-Id: I1f7748dc767abb3904634a04e0991b74ac7b756d Signed-off-by: Kshitij Sisodia <kshitij.sisodia@arm.com>
Diffstat (limited to 'source/use_case')
-rw-r--r--source/use_case/ad/include/AdMelSpectrogram.hpp97
-rw-r--r--source/use_case/ad/include/AdModel.hpp59
-rw-r--r--source/use_case/ad/include/AdProcessing.hpp230
-rw-r--r--source/use_case/ad/include/MelSpectrogram.hpp234
-rw-r--r--source/use_case/ad/src/AdMelSpectrogram.cc93
-rw-r--r--source/use_case/ad/src/AdModel.cc54
-rw-r--r--source/use_case/ad/src/AdProcessing.cc208
-rw-r--r--source/use_case/ad/src/MainLoop.cc25
-rw-r--r--source/use_case/ad/src/MelSpectrogram.cc316
-rw-r--r--source/use_case/ad/usecase.cmake3
-rw-r--r--source/use_case/asr/include/AsrClassifier.hpp63
-rw-r--r--source/use_case/asr/include/AsrResult.hpp63
-rw-r--r--source/use_case/asr/include/OutputDecode.hpp40
-rw-r--r--source/use_case/asr/include/Wav2LetterMfcc.hpp109
-rw-r--r--source/use_case/asr/include/Wav2LetterModel.hpp65
-rw-r--r--source/use_case/asr/include/Wav2LetterPostprocess.hpp108
-rw-r--r--source/use_case/asr/include/Wav2LetterPreprocess.hpp182
-rw-r--r--source/use_case/asr/src/AsrClassifier.cc144
-rw-r--r--source/use_case/asr/src/MainLoop.cc34
-rw-r--r--source/use_case/asr/src/OutputDecode.cc47
-rw-r--r--source/use_case/asr/src/Wav2LetterMfcc.cc141
-rw-r--r--source/use_case/asr/src/Wav2LetterModel.cc57
-rw-r--r--source/use_case/asr/src/Wav2LetterPostprocess.cc214
-rw-r--r--source/use_case/asr/src/Wav2LetterPreprocess.cc208
-rw-r--r--source/use_case/asr/usecase.cmake4
-rw-r--r--source/use_case/img_class/include/ImgClassProcessing.hpp92
-rw-r--r--source/use_case/img_class/include/MobileNetModel.hpp55
-rw-r--r--source/use_case/img_class/src/ImgClassProcessing.cc65
-rw-r--r--source/use_case/img_class/src/MainLoop.cc24
-rw-r--r--source/use_case/img_class/src/MobileNetModel.cc56
-rw-r--r--source/use_case/img_class/usecase.cmake2
-rw-r--r--source/use_case/inference_runner/include/TestModel.hpp47
-rw-r--r--source/use_case/inference_runner/src/MainLoop.cc45
-rw-r--r--source/use_case/inference_runner/src/TestModel.cc55
-rw-r--r--source/use_case/inference_runner/usecase.cmake2
-rw-r--r--source/use_case/kws/include/KwsProcessing.hpp138
-rw-r--r--source/use_case/kws/include/KwsResult.hpp63
-rw-r--r--source/use_case/kws/include/MicroNetKwsMfcc.hpp50
-rw-r--r--source/use_case/kws/include/MicroNetKwsModel.hpp59
-rw-r--r--source/use_case/kws/src/KwsProcessing.cc212
-rw-r--r--source/use_case/kws/src/MainLoop.cc34
-rw-r--r--source/use_case/kws/src/MicroNetKwsModel.cc56
-rw-r--r--source/use_case/kws/usecase.cmake3
-rw-r--r--source/use_case/kws_asr/include/AsrClassifier.hpp66
-rw-r--r--source/use_case/kws_asr/include/AsrResult.hpp63
-rw-r--r--source/use_case/kws_asr/include/KwsProcessing.hpp138
-rw-r--r--source/use_case/kws_asr/include/KwsResult.hpp63
-rw-r--r--source/use_case/kws_asr/include/MicroNetKwsMfcc.hpp51
-rw-r--r--source/use_case/kws_asr/include/MicroNetKwsModel.hpp66
-rw-r--r--source/use_case/kws_asr/include/OutputDecode.hpp40
-rw-r--r--source/use_case/kws_asr/include/Wav2LetterMfcc.hpp113
-rw-r--r--source/use_case/kws_asr/include/Wav2LetterModel.hpp71
-rw-r--r--source/use_case/kws_asr/include/Wav2LetterPostprocess.hpp108
-rw-r--r--source/use_case/kws_asr/include/Wav2LetterPreprocess.hpp182
-rw-r--r--source/use_case/kws_asr/src/AsrClassifier.cc136
-rw-r--r--source/use_case/kws_asr/src/KwsProcessing.cc212
-rw-r--r--source/use_case/kws_asr/src/MainLoop.cc46
-rw-r--r--source/use_case/kws_asr/src/MicroNetKwsModel.cc63
-rw-r--r--source/use_case/kws_asr/src/OutputDecode.cc47
-rw-r--r--source/use_case/kws_asr/src/UseCaseHandler.cc3
-rw-r--r--source/use_case/kws_asr/src/Wav2LetterMfcc.cc141
-rw-r--r--source/use_case/kws_asr/src/Wav2LetterModel.cc61
-rw-r--r--source/use_case/kws_asr/src/Wav2LetterPostprocess.cc214
-rw-r--r--source/use_case/kws_asr/src/Wav2LetterPreprocess.cc208
-rw-r--r--source/use_case/kws_asr/usecase.cmake4
-rw-r--r--source/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp341
-rw-r--r--source/use_case/noise_reduction/include/RNNoiseModel.hpp82
-rw-r--r--source/use_case/noise_reduction/include/RNNoiseProcessing.hpp113
-rw-r--r--source/use_case/noise_reduction/src/MainLoop.cc28
-rw-r--r--source/use_case/noise_reduction/src/RNNoiseFeatureProcessor.cc892
-rw-r--r--source/use_case/noise_reduction/src/RNNoiseModel.cc110
-rw-r--r--source/use_case/noise_reduction/src/RNNoiseProcessing.cc100
-rw-r--r--source/use_case/noise_reduction/usecase.cmake2
-rw-r--r--source/use_case/object_detection/include/DetectionResult.hpp61
-rw-r--r--source/use_case/object_detection/include/DetectorPostProcessing.hpp126
-rw-r--r--source/use_case/object_detection/include/DetectorPreProcessing.hpp60
-rw-r--r--source/use_case/object_detection/include/YoloFastestModel.hpp60
-rw-r--r--source/use_case/object_detection/src/DetectorPostProcessing.cc240
-rw-r--r--source/use_case/object_detection/src/DetectorPreProcessing.cc52
-rw-r--r--source/use_case/object_detection/src/MainLoop.cc25
-rw-r--r--source/use_case/object_detection/src/YoloFastestModel.cc59
-rw-r--r--source/use_case/object_detection/usecase.cmake2
-rw-r--r--source/use_case/vww/include/VisualWakeWordModel.hpp54
-rw-r--r--source/use_case/vww/include/VisualWakeWordProcessing.hpp93
-rw-r--r--source/use_case/vww/src/MainLoop.cc27
-rw-r--r--source/use_case/vww/src/VisualWakeWordModel.cc56
-rw-r--r--source/use_case/vww/src/VisualWakeWordProcessing.cc80
-rw-r--r--source/use_case/vww/usecase.cmake4
88 files changed, 285 insertions, 8334 deletions
diff --git a/source/use_case/ad/include/AdMelSpectrogram.hpp b/source/use_case/ad/include/AdMelSpectrogram.hpp
deleted file mode 100644
index 05c5bfc..0000000
--- a/source/use_case/ad/include/AdMelSpectrogram.hpp
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ADMELSPECTROGRAM_HPP
-#define ADMELSPECTROGRAM_HPP
-
-#include "MelSpectrogram.hpp"
-
-namespace arm {
-namespace app {
-namespace audio {
-
- /* Class to provide anomaly detection specific Mel Spectrogram calculation requirements */
- class AdMelSpectrogram : public MelSpectrogram {
-
- public:
- static constexpr uint32_t ms_defaultSamplingFreq = 16000;
- static constexpr uint32_t ms_defaultNumFbankBins = 64;
- static constexpr uint32_t ms_defaultMelLoFreq = 0;
- static constexpr uint32_t ms_defaultMelHiFreq = 8000;
- static constexpr bool ms_defaultUseHtkMethod = false;
-
- explicit AdMelSpectrogram(const size_t frameLen)
- : MelSpectrogram(MelSpecParams(
- ms_defaultSamplingFreq, ms_defaultNumFbankBins,
- ms_defaultMelLoFreq, ms_defaultMelHiFreq,
- frameLen, ms_defaultUseHtkMethod))
- {}
-
- AdMelSpectrogram() = delete;
- ~AdMelSpectrogram() = default;
-
- protected:
-
- /**
- * @brief Overrides base class implementation of this function.
- * @param[in] fftVec Vector populated with FFT magnitudes
- * @param[in] melFilterBank 2D Vector with filter bank weights
- * @param[in] filterBankFilterFirst Vector containing the first indices of filter bank
- * to be used for each bin.
- * @param[in] filterBankFilterLast Vector containing the last indices of filter bank
- * to be used for each bin.
- * @param[out] melEnergies Pre-allocated vector of MEL energies to be
- * populated.
- * @return true if successful, false otherwise
- */
- virtual bool ApplyMelFilterBank(
- std::vector<float>& fftVec,
- std::vector<std::vector<float>>& melFilterBank,
- std::vector<uint32_t>& filterBankFilterFirst,
- std::vector<uint32_t>& filterBankFilterLast,
- std::vector<float>& melEnergies) override;
-
- /**
- * @brief Override for the base class implementation convert mel
- * energies to logarithmic scale. The difference from
- * default behaviour is that the power is converted to dB
- * and subsequently clamped.
- * @param[in,out] melEnergies - 1D vector of Mel energies
- **/
- virtual void ConvertToLogarithmicScale(std::vector<float>& melEnergies) override;
-
- /**
- * @brief Given the low and high Mel values, get the normaliser
- * for weights to be applied when populating the filter
- * bank. Override for the base class implementation.
- * @param[in] leftMel - low Mel frequency value
- * @param[in] rightMel - high Mel frequency value
- * @param[in] useHTKMethod - bool to signal if HTK method is to be
- * used for calculation
- * @return Return float value to be applied
- * when populating the filter bank.
- */
- virtual float GetMelFilterBankNormaliser(
- const float& leftMel,
- const float& rightMel,
- const bool useHTKMethod) override;
- };
-
-} /* namespace audio */
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* ADMELSPECTROGRAM_HPP */
diff --git a/source/use_case/ad/include/AdModel.hpp b/source/use_case/ad/include/AdModel.hpp
deleted file mode 100644
index 2195a7c..0000000
--- a/source/use_case/ad/include/AdModel.hpp
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef AD_MODEL_HPP
-#define AD_MODEL_HPP
-
-#include "Model.hpp"
-
-extern const int g_FrameLength;
-extern const int g_FrameStride;
-extern const float g_ScoreThreshold;
-extern const float g_TrainingMean;
-
-namespace arm {
-namespace app {
-
- class AdModel : public Model {
-
- public:
- /* Indices for the expected model - based on input tensor shape */
- static constexpr uint32_t ms_inputRowsIdx = 1;
- static constexpr uint32_t ms_inputColsIdx = 2;
-
- protected:
- /** @brief Gets the reference to op resolver interface class */
- const tflite::MicroOpResolver& GetOpResolver() override;
-
- /** @brief Adds operations to the op resolver instance */
- bool EnlistOperations() override;
-
- const uint8_t* ModelPointer() override;
-
- size_t ModelSize() override;
-
- private:
- /* Maximum number of individual operations that can be enlisted */
- static constexpr int ms_maxOpCnt = 6;
-
- /* A mutable op resolver instance */
- tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* AD_MODEL_HPP */
diff --git a/source/use_case/ad/include/AdProcessing.hpp b/source/use_case/ad/include/AdProcessing.hpp
deleted file mode 100644
index 9abf6f1..0000000
--- a/source/use_case/ad/include/AdProcessing.hpp
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef AD_PROCESSING_HPP
-#define AD_PROCESSING_HPP
-
-#include "BaseProcessing.hpp"
-#include "AudioUtils.hpp"
-#include "AdMelSpectrogram.hpp"
-#include "log_macros.h"
-
-namespace arm {
-namespace app {
-
- /**
- * @brief Pre-processing class for anomaly detection use case.
- * Implements methods declared by BasePreProcess and anything else needed
- * to populate input tensors ready for inference.
- */
- class AdPreProcess : public BasePreProcess {
-
- public:
- /**
- * @brief Constructor for AdPreProcess class objects
- * @param[in] inputTensor input tensor pointer from the tensor arena.
- * @param[in] melSpectrogramFrameLen MEL spectrogram's frame length
- * @param[in] melSpectrogramFrameStride MEL spectrogram's frame stride
- * @param[in] adModelTrainingMean Training mean for the Anomaly detection model being used.
- */
- explicit AdPreProcess(TfLiteTensor* inputTensor,
- uint32_t melSpectrogramFrameLen,
- uint32_t melSpectrogramFrameStride,
- float adModelTrainingMean);
-
- ~AdPreProcess() = default;
-
- /**
- * @brief Function to invoke pre-processing and populate the input vector
- * @param input pointer to input data. For anomaly detection, this is the pointer to
- * the audio data.
- * @param inputSize Size of the data being passed in for pre-processing.
- * @return True if successful, false otherwise.
- */
- bool DoPreProcess(const void* input, size_t inputSize) override;
-
- /**
- * @brief Getter function for audio window size computed when constructing
- * the class object.
- * @return Audio window size as 32 bit unsigned integer.
- */
- uint32_t GetAudioWindowSize();
-
- /**
- * @brief Getter function for audio window stride computed when constructing
- * the class object.
- * @return Audio window stride as 32 bit unsigned integer.
- */
- uint32_t GetAudioDataStride();
-
- /**
- * @brief Setter function for current audio index. This is only used for evaluating
- * if previously computed features can be re-used from cache.
- */
- void SetAudioWindowIndex(uint32_t idx);
-
- private:
- bool m_validInstance{false}; /**< Indicates the current object is valid. */
- uint32_t m_melSpectrogramFrameLen{}; /**< MEL spectrogram's window frame length */
- uint32_t m_melSpectrogramFrameStride{}; /**< MEL spectrogram's window frame stride */
- uint8_t m_inputResizeScale{}; /**< Downscaling factor for the MEL energy matrix. */
- uint32_t m_numMelSpecVectorsInAudioStride{}; /**< Number of frames to move across the audio. */
- uint32_t m_audioDataWindowSize{}; /**< Audio window size computed based on other parameters. */
- uint32_t m_audioDataStride{}; /**< Audio window stride computed. */
- uint32_t m_numReusedFeatureVectors{}; /**< Number of MEL vectors that can be re-used */
- uint32_t m_audioWindowIndex{}; /**< Current audio window index (from audio's sliding window) */
-
- audio::SlidingWindow<const int16_t> m_melWindowSlider; /**< Internal MEL spectrogram window slider */
- audio::AdMelSpectrogram m_melSpec; /**< MEL spectrogram computation object */
- std::function<void
- (std::vector<int16_t>&, int, bool, size_t, size_t)> m_featureCalc; /**< Feature calculator object */
- };
-
- class AdPostProcess : public BasePostProcess {
- public:
- /**
- * @brief Constructor for AdPostProcess object.
- * @param[in] outputTensor Output tensor pointer.
- */
- explicit AdPostProcess(TfLiteTensor* outputTensor);
-
- ~AdPostProcess() = default;
-
- /**
- * @brief Function to do the post-processing on the output tensor.
- * @return True if successful, false otherwise.
- */
- bool DoPostProcess() override;
-
- /**
- * @brief Getter function for an element from the de-quantised output vector.
- * @param index Index of the element to be retrieved.
- * @return index represented as a 32 bit floating point number.
- */
- float GetOutputValue(uint32_t index);
-
- private:
- TfLiteTensor* m_outputTensor{}; /**< Output tensor pointer */
- std::vector<float> m_dequantizedOutputVec{}; /**< Internal output vector */
-
- /**
- * @brief De-quantizes and flattens the output tensor into a vector.
- * @tparam T template parameter to indicate data type.
- * @return True if successful, false otherwise.
- */
- template<typename T>
- bool Dequantize()
- {
- TfLiteTensor* tensor = this->m_outputTensor;
- if (tensor == nullptr) {
- printf_err("Invalid output tensor.\n");
- return false;
- }
- T* tensorData = tflite::GetTensorData<T>(tensor);
-
- uint32_t totalOutputSize = 1;
- for (int inputDim = 0; inputDim < tensor->dims->size; inputDim++){
- totalOutputSize *= tensor->dims->data[inputDim];
- }
-
- /* For getting the floating point values, we need quantization parameters */
- QuantParams quantParams = GetTensorQuantParams(tensor);
-
- this->m_dequantizedOutputVec = std::vector<float>(totalOutputSize, 0);
-
- for (size_t i = 0; i < totalOutputSize; ++i) {
- this->m_dequantizedOutputVec[i] = quantParams.scale * (tensorData[i] - quantParams.offset);
- }
-
- return true;
- }
- };
-
- /* Templated instances available: */
- template bool AdPostProcess::Dequantize<int8_t>();
-
- /**
- * @brief Generic feature calculator factory.
- *
- * Returns lambda function to compute features using features cache.
- * Real features math is done by a lambda function provided as a parameter.
- * Features are written to input tensor memory.
- *
- * @tparam T feature vector type.
- * @param inputTensor model input tensor pointer.
- * @param cacheSize number of feature vectors to cache. Defined by the sliding window overlap.
- * @param compute features calculator function.
- * @return lambda function to compute features.
- */
- template<class T>
- std::function<void (std::vector<int16_t>&, size_t, bool, size_t, size_t)>
- FeatureCalc(TfLiteTensor* inputTensor, size_t cacheSize,
- std::function<std::vector<T> (std::vector<int16_t>& )> compute)
- {
- /* Feature cache to be captured by lambda function*/
- static std::vector<std::vector<T>> featureCache = std::vector<std::vector<T>>(cacheSize);
-
- return [=](std::vector<int16_t>& audioDataWindow,
- size_t index,
- bool useCache,
- size_t featuresOverlapIndex,
- size_t resizeScale)
- {
- T* tensorData = tflite::GetTensorData<T>(inputTensor);
- std::vector<T> features;
-
- /* Reuse features from cache if cache is ready and sliding windows overlap.
- * Overlap is in the beginning of sliding window with a size of a feature cache. */
- if (useCache && index < featureCache.size()) {
- features = std::move(featureCache[index]);
- } else {
- features = std::move(compute(audioDataWindow));
- }
- auto size = features.size() / resizeScale;
- auto sizeBytes = sizeof(T);
-
- /* Input should be transposed and "resized" by skipping elements. */
- for (size_t outIndex = 0; outIndex < size; outIndex++) {
- std::memcpy(tensorData + (outIndex*size) + index, &features[outIndex*resizeScale], sizeBytes);
- }
-
- /* Start renewing cache as soon iteration goes out of the windows overlap. */
- if (index >= featuresOverlapIndex / resizeScale) {
- featureCache[index - featuresOverlapIndex / resizeScale] = std::move(features);
- }
- };
- }
-
- template std::function<void (std::vector<int16_t>&, size_t , bool, size_t, size_t)>
- FeatureCalc<int8_t>(TfLiteTensor* inputTensor,
- size_t cacheSize,
- std::function<std::vector<int8_t> (std::vector<int16_t>&)> compute);
-
- template std::function<void(std::vector<int16_t>&, size_t, bool, size_t, size_t)>
- FeatureCalc<float>(TfLiteTensor *inputTensor,
- size_t cacheSize,
- std::function<std::vector<float>(std::vector<int16_t>&)> compute);
-
- std::function<void (std::vector<int16_t>&, int, bool, size_t, size_t)>
- GetFeatureCalculator(audio::AdMelSpectrogram& melSpec,
- TfLiteTensor* inputTensor,
- size_t cacheSize,
- float trainingMean);
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* AD_PROCESSING_HPP */
diff --git a/source/use_case/ad/include/MelSpectrogram.hpp b/source/use_case/ad/include/MelSpectrogram.hpp
deleted file mode 100644
index d3ea3f7..0000000
--- a/source/use_case/ad/include/MelSpectrogram.hpp
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MELSPECTROGRAM_HPP
-#define MELSPECTROGRAM_HPP
-
-#include "PlatformMath.hpp"
-
-#include <vector>
-#include <cstdint>
-#include <cmath>
-#include <limits>
-#include <string>
-
-namespace arm {
-namespace app {
-namespace audio {
-
- /* Mel Spectrogram consolidated parameters */
- class MelSpecParams {
- public:
- float m_samplingFreq;
- uint32_t m_numFbankBins;
- float m_melLoFreq;
- float m_melHiFreq;
- uint32_t m_frameLen;
- uint32_t m_frameLenPadded;
- bool m_useHtkMethod;
-
- /** @brief Constructor */
- MelSpecParams(const float samplingFreq, const uint32_t numFbankBins,
- const float melLoFreq, const float melHiFreq,
- const uint32_t frameLen, const bool useHtkMethod);
-
- MelSpecParams() = delete;
- ~MelSpecParams() = default;
-
- /** @brief String representation of parameters */
- std::string Str() const;
- };
-
- /**
- * @brief Class for Mel Spectrogram feature extraction.
- * Based on https://github.com/ARM-software/ML-KWS-for-MCU/blob/master/Deployment/Source/MFCC/mfcc.cpp
- * This class is designed to be generic and self-sufficient but
- * certain calculation routines can be overridden to accommodate
- * use-case specific requirements.
- */
- class MelSpectrogram {
-
- public:
- /**
- * @brief Extract Mel Spectrogram for one single small frame of
- * audio data e.g. 640 samples.
- * @param[in] audioData Vector of audio samples to calculate
- * features for.
- * @param[in] trainingMean Value to subtract from the the computed mel spectrogram, default 0.
- * @return Vector of extracted Mel Spectrogram features.
- **/
- std::vector<float> ComputeMelSpec(const std::vector<int16_t>& audioData, float trainingMean = 0);
-
- /**
- * @brief Constructor
- * @param[in] params Mel Spectrogram parameters
- */
- explicit MelSpectrogram(const MelSpecParams& params);
-
- MelSpectrogram() = delete;
- ~MelSpectrogram() = default;
-
- /** @brief Initialise */
- void Init();
-
- /**
- * @brief Extract Mel Spectrogram features and quantise for one single small
- * frame of audio data e.g. 640 samples.
- * @param[in] audioData Vector of audio samples to calculate
- * features for.
- * @param[in] quantScale quantisation scale.
- * @param[in] quantOffset quantisation offset.
- * @param[in] trainingMean training mean.
- * @return Vector of extracted quantised Mel Spectrogram features.
- **/
- template<typename T>
- std::vector<T> MelSpecComputeQuant(const std::vector<int16_t>& audioData,
- const float quantScale,
- const int quantOffset,
- float trainingMean = 0)
- {
- this->ComputeMelSpec(audioData, trainingMean);
- float minVal = std::numeric_limits<T>::min();
- float maxVal = std::numeric_limits<T>::max();
-
- std::vector<T> melSpecOut(this->m_params.m_numFbankBins);
- const size_t numFbankBins = this->m_params.m_numFbankBins;
-
- /* Quantize to T. */
- for (size_t k = 0; k < numFbankBins; ++k) {
- auto quantizedEnergy = std::round(((this->m_melEnergies[k]) / quantScale) + quantOffset);
- melSpecOut[k] = static_cast<T>(std::min<float>(std::max<float>(quantizedEnergy, minVal), maxVal));
- }
-
- return melSpecOut;
- }
-
- /* Constants */
- static constexpr float ms_logStep = /*logf(6.4)*/ 1.8562979903656 / 27.0;
- static constexpr float ms_freqStep = 200.0 / 3;
- static constexpr float ms_minLogHz = 1000.0;
- static constexpr float ms_minLogMel = ms_minLogHz / ms_freqStep;
-
- protected:
- /**
- * @brief Project input frequency to Mel Scale.
- * @param[in] freq input frequency in floating point
- * @param[in] useHTKMethod bool to signal if HTK method is to be
- * used for calculation
- * @return Mel transformed frequency in floating point
- **/
- static float MelScale(const float freq,
- const bool useHTKMethod = true);
-
- /**
- * @brief Inverse Mel transform - convert MEL warped frequency
- * back to normal frequency
- * @param[in] melFreq Mel frequency in floating point
- * @param[in] useHTKMethod bool to signal if HTK method is to be
- * used for calculation
- * @return Real world frequency in floating point
- **/
- static float InverseMelScale(const float melFreq,
- const bool useHTKMethod = true);
-
- /**
- * @brief Populates MEL energies after applying the MEL filter
- * bank weights and adding them up to be placed into
- * bins, according to the filter bank's first and last
- * indices (pre-computed for each filter bank element
- * by CreateMelFilterBank function).
- * @param[in] fftVec Vector populated with FFT magnitudes
- * @param[in] melFilterBank 2D Vector with filter bank weights
- * @param[in] filterBankFilterFirst Vector containing the first indices of filter bank
- * to be used for each bin.
- * @param[in] filterBankFilterLast Vector containing the last indices of filter bank
- * to be used for each bin.
- * @param[out] melEnergies Pre-allocated vector of MEL energies to be
- * populated.
- * @return true if successful, false otherwise
- */
- virtual bool ApplyMelFilterBank(
- std::vector<float>& fftVec,
- std::vector<std::vector<float>>& melFilterBank,
- std::vector<uint32_t>& filterBankFilterFirst,
- std::vector<uint32_t>& filterBankFilterLast,
- std::vector<float>& melEnergies);
-
- /**
- * @brief Converts the Mel energies for logarithmic scale
- * @param[in,out] melEnergies 1D vector of Mel energies
- **/
- virtual void ConvertToLogarithmicScale(std::vector<float>& melEnergies);
-
- /**
- * @brief Given the low and high Mel values, get the normaliser
- * for weights to be applied when populating the filter
- * bank.
- * @param[in] leftMel low Mel frequency value
- * @param[in] rightMel high Mel frequency value
- * @param[in] useHTKMethod bool to signal if HTK method is to be
- * used for calculation
- * @return Return float value to be applied
- * when populating the filter bank.
- */
- virtual float GetMelFilterBankNormaliser(
- const float& leftMel,
- const float& rightMel,
- const bool useHTKMethod);
-
- private:
- MelSpecParams m_params;
- std::vector<float> m_frame;
- std::vector<float> m_buffer;
- std::vector<float> m_melEnergies;
- std::vector<float> m_windowFunc;
- std::vector<std::vector<float>> m_melFilterBank;
- std::vector<uint32_t> m_filterBankFilterFirst;
- std::vector<uint32_t> m_filterBankFilterLast;
- bool m_filterBankInitialised;
- arm::app::math::FftInstance m_fftInstance;
-
- /**
- * @brief Initialises the filter banks.
- **/
- void InitMelFilterBank();
-
- /**
- * @brief Signals whether the instance of MelSpectrogram has had its
- * required buffers initialised
- * @return True if initialised, false otherwise
- **/
- bool IsMelFilterBankInited() const;
-
- /**
- * @brief Create mel filter banks for Mel Spectrogram calculation.
- * @return 2D vector of floats
- **/
- std::vector<std::vector<float>> CreateMelFilterBank();
-
- /**
- * @brief Computes the magnitude from an interleaved complex array
- **/
- void ConvertToPowerSpectrum();
-
- };
-
-} /* namespace audio */
-} /* namespace app */
-} /* namespace arm */
-
-
-#endif /* MELSPECTROGRAM_HPP */
diff --git a/source/use_case/ad/src/AdMelSpectrogram.cc b/source/use_case/ad/src/AdMelSpectrogram.cc
deleted file mode 100644
index 14b9323..0000000
--- a/source/use_case/ad/src/AdMelSpectrogram.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "AdMelSpectrogram.hpp"
-#include "PlatformMath.hpp"
-#include "log_macros.h"
-
-#include <cfloat>
-
-namespace arm {
-namespace app {
-namespace audio {
-
- bool AdMelSpectrogram::ApplyMelFilterBank(
- std::vector<float>& fftVec,
- std::vector<std::vector<float>>& melFilterBank,
- std::vector<uint32_t>& filterBankFilterFirst,
- std::vector<uint32_t>& filterBankFilterLast,
- std::vector<float>& melEnergies)
- {
- const size_t numBanks = melEnergies.size();
-
- if (numBanks != filterBankFilterFirst.size() ||
- numBanks != filterBankFilterLast.size()) {
- printf_err("unexpected filter bank lengths\n");
- return false;
- }
-
- for (size_t bin = 0; bin < numBanks; ++bin) {
- auto filterBankIter = melFilterBank[bin].begin();
- auto end = melFilterBank[bin].end();
- float melEnergy = FLT_MIN; /* Avoid log of zero at later stages. */
- const uint32_t firstIndex = filterBankFilterFirst[bin];
- const uint32_t lastIndex = std::min<int32_t>(filterBankFilterLast[bin], fftVec.size() - 1);
-
- for (uint32_t i = firstIndex; i <= lastIndex && filterBankIter != end; ++i) {
- melEnergy += (*filterBankIter++ * fftVec[i]);
- }
-
- melEnergies[bin] = melEnergy;
- }
-
- return true;
- }
-
- void AdMelSpectrogram::ConvertToLogarithmicScale(
- std::vector<float>& melEnergies)
- {
- /* Container for natural logarithms of mel energies */
- std::vector <float> vecLogEnergies(melEnergies.size(), 0.f);
-
- /* Because we are taking natural logs, we need to multiply by log10(e).
- * Also, for wav2letter model, we scale our log10 values by 10 */
- constexpr float multiplier = 10.0 * /* default scalar */
- 0.4342944819032518; /* log10f(std::exp(1.0))*/
-
- /* Take log of the whole vector */
- math::MathUtils::VecLogarithmF32(melEnergies, vecLogEnergies);
-
- /* Scale the log values. */
- for (auto iterM = melEnergies.begin(), iterL = vecLogEnergies.begin();
- iterM != melEnergies.end() && iterL != vecLogEnergies.end(); ++iterM, ++iterL) {
-
- *iterM = *iterL * multiplier;
- }
- }
-
- float AdMelSpectrogram::GetMelFilterBankNormaliser(
- const float& leftMel,
- const float& rightMel,
- const bool useHTKMethod)
- {
- /* Slaney normalization for mel weights. */
- return (2.0f / (AdMelSpectrogram::InverseMelScale(rightMel, useHTKMethod) -
- AdMelSpectrogram::InverseMelScale(leftMel, useHTKMethod)));
- }
-
-} /* namespace audio */
-} /* namespace app */
-} /* namespace arm */
diff --git a/source/use_case/ad/src/AdModel.cc b/source/use_case/ad/src/AdModel.cc
deleted file mode 100644
index a2ef260..0000000
--- a/source/use_case/ad/src/AdModel.cc
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "AdModel.hpp"
-#include "log_macros.h"
-
-const tflite::MicroOpResolver& arm::app::AdModel::GetOpResolver()
-{
- return this->m_opResolver;
-}
-
-bool arm::app::AdModel::EnlistOperations()
-{
- this->m_opResolver.AddAveragePool2D();
- this->m_opResolver.AddConv2D();
- this->m_opResolver.AddDepthwiseConv2D();
- this->m_opResolver.AddRelu6();
- this->m_opResolver.AddReshape();
-
-#if defined(ARM_NPU)
- if (kTfLiteOk == this->m_opResolver.AddEthosU()) {
- info("Added %s support to op resolver\n",
- tflite::GetString_ETHOSU());
- } else {
- printf_err("Failed to add Arm NPU support to op resolver.");
- return false;
- }
-#endif /* ARM_NPU */
- return true;
-}
-
-extern uint8_t* GetModelPointer();
-const uint8_t* arm::app::AdModel::ModelPointer()
-{
- return GetModelPointer();
-}
-extern size_t GetModelLen();
-size_t arm::app::AdModel::ModelSize()
-{
- return GetModelLen();
-}
diff --git a/source/use_case/ad/src/AdProcessing.cc b/source/use_case/ad/src/AdProcessing.cc
deleted file mode 100644
index a33131c..0000000
--- a/source/use_case/ad/src/AdProcessing.cc
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "AdProcessing.hpp"
-
-#include "AdModel.hpp"
-
-namespace arm {
-namespace app {
-
-AdPreProcess::AdPreProcess(TfLiteTensor* inputTensor,
- uint32_t melSpectrogramFrameLen,
- uint32_t melSpectrogramFrameStride,
- float adModelTrainingMean):
- m_validInstance{false},
- m_melSpectrogramFrameLen{melSpectrogramFrameLen},
- m_melSpectrogramFrameStride{melSpectrogramFrameStride},
- /**< Model is trained on features downsampled 2x */
- m_inputResizeScale{2},
- /**< We are choosing to move by 20 frames across the audio for each inference. */
- m_numMelSpecVectorsInAudioStride{20},
- m_audioDataStride{m_numMelSpecVectorsInAudioStride * melSpectrogramFrameStride},
- m_melSpec{melSpectrogramFrameLen}
-{
- if (!inputTensor) {
- printf_err("Invalid input tensor provided to pre-process\n");
- return;
- }
-
- TfLiteIntArray* inputShape = inputTensor->dims;
-
- if (!inputShape) {
- printf_err("Invalid input tensor dims\n");
- return;
- }
-
- const uint32_t kNumRows = inputShape->data[AdModel::ms_inputRowsIdx];
- const uint32_t kNumCols = inputShape->data[AdModel::ms_inputColsIdx];
-
- /* Deduce the data length required for 1 inference from the network parameters. */
- this->m_audioDataWindowSize = (((this->m_inputResizeScale * kNumCols) - 1) *
- melSpectrogramFrameStride) +
- melSpectrogramFrameLen;
- this->m_numReusedFeatureVectors = kNumRows -
- (this->m_numMelSpecVectorsInAudioStride /
- this->m_inputResizeScale);
- this->m_melSpec.Init();
-
- /* Creating a Mel Spectrogram sliding window for the data required for 1 inference.
- * "resizing" done here by multiplying stride by resize scale. */
- this->m_melWindowSlider = audio::SlidingWindow<const int16_t>(
- nullptr, /* to be populated later. */
- this->m_audioDataWindowSize,
- melSpectrogramFrameLen,
- melSpectrogramFrameStride * this->m_inputResizeScale);
-
- /* Construct feature calculation function. */
- this->m_featureCalc = GetFeatureCalculator(this->m_melSpec, inputTensor,
- this->m_numReusedFeatureVectors,
- adModelTrainingMean);
- this->m_validInstance = true;
-}
-
-bool AdPreProcess::DoPreProcess(const void* input, size_t inputSize)
-{
- /* Check that we have a valid instance. */
- if (!this->m_validInstance) {
- printf_err("Invalid pre-processor instance\n");
- return false;
- }
-
- /* We expect that we can traverse the size with which the MEL spectrogram
- * sliding window was initialised with. */
- if (!input || inputSize < this->m_audioDataWindowSize) {
- printf_err("Invalid input provided for pre-processing\n");
- return false;
- }
-
- /* We moved to the next window - set the features sliding to the new address. */
- this->m_melWindowSlider.Reset(static_cast<const int16_t*>(input));
-
- /* The first window does not have cache ready. */
- const bool useCache = this->m_audioWindowIndex > 0 && this->m_numReusedFeatureVectors > 0;
-
- /* Start calculating features inside one audio sliding window. */
- while (this->m_melWindowSlider.HasNext()) {
- const int16_t* melSpecWindow = this->m_melWindowSlider.Next();
- std::vector<int16_t> melSpecAudioData = std::vector<int16_t>(
- melSpecWindow,
- melSpecWindow + this->m_melSpectrogramFrameLen);
-
- /* Compute features for this window and write them to input tensor. */
- this->m_featureCalc(melSpecAudioData,
- this->m_melWindowSlider.Index(),
- useCache,
- this->m_numMelSpecVectorsInAudioStride,
- this->m_inputResizeScale);
- }
-
- return true;
-}
-
-uint32_t AdPreProcess::GetAudioWindowSize()
-{
- return this->m_audioDataWindowSize;
-}
-
-uint32_t AdPreProcess::GetAudioDataStride()
-{
- return this->m_audioDataStride;
-}
-
-void AdPreProcess::SetAudioWindowIndex(uint32_t idx)
-{
- this->m_audioWindowIndex = idx;
-}
-
-AdPostProcess::AdPostProcess(TfLiteTensor* outputTensor) :
- m_outputTensor {outputTensor}
-{}
-
-bool AdPostProcess::DoPostProcess()
-{
- switch (this->m_outputTensor->type) {
- case kTfLiteInt8:
- this->Dequantize<int8_t>();
- break;
- default:
- printf_err("Unsupported tensor type");
- return false;
- }
-
- math::MathUtils::SoftmaxF32(this->m_dequantizedOutputVec);
- return true;
-}
-
-float AdPostProcess::GetOutputValue(uint32_t index)
-{
- if (index < this->m_dequantizedOutputVec.size()) {
- return this->m_dequantizedOutputVec[index];
- }
- printf_err("Invalid index for output\n");
- return 0.0;
-}
-
-std::function<void (std::vector<int16_t>&, int, bool, size_t, size_t)>
-GetFeatureCalculator(audio::AdMelSpectrogram& melSpec,
- TfLiteTensor* inputTensor,
- size_t cacheSize,
- float trainingMean)
-{
- std::function<void (std::vector<int16_t>&, size_t, bool, size_t, size_t)> melSpecFeatureCalc;
-
- TfLiteQuantization quant = inputTensor->quantization;
-
- if (kTfLiteAffineQuantization == quant.type) {
-
- auto* quantParams = static_cast<TfLiteAffineQuantization*>(quant.params);
- const float quantScale = quantParams->scale->data[0];
- const int quantOffset = quantParams->zero_point->data[0];
-
- switch (inputTensor->type) {
- case kTfLiteInt8: {
- melSpecFeatureCalc = FeatureCalc<int8_t>(
- inputTensor,
- cacheSize,
- [=, &melSpec](std::vector<int16_t>& audioDataWindow) {
- return melSpec.MelSpecComputeQuant<int8_t>(
- audioDataWindow,
- quantScale,
- quantOffset,
- trainingMean);
- }
- );
- break;
- }
- default:
- printf_err("Tensor type %s not supported\n", TfLiteTypeGetName(inputTensor->type));
- }
- } else {
- melSpecFeatureCalc = FeatureCalc<float>(
- inputTensor,
- cacheSize,
- [=, &melSpec](
- std::vector<int16_t>& audioDataWindow) {
- return melSpec.ComputeMelSpec(
- audioDataWindow,
- trainingMean);
- });
- }
- return melSpecFeatureCalc;
-}
-
-} /* namespace app */
-} /* namespace arm */
diff --git a/source/use_case/ad/src/MainLoop.cc b/source/use_case/ad/src/MainLoop.cc
index 140359b..e9f7b4e 100644
--- a/source/use_case/ad/src/MainLoop.cc
+++ b/source/use_case/ad/src/MainLoop.cc
@@ -18,7 +18,17 @@
#include "AdModel.hpp" /* Model class for running inference */
#include "UseCaseCommonUtils.hpp" /* Utils functions */
#include "UseCaseHandler.hpp" /* Handlers for different user options */
-#include "log_macros.h"
+#include "log_macros.h" /* Logging functions */
+#include "BufAttributes.hpp" /* Buffer attributes to be applied */
+
+namespace arm {
+ namespace app {
+ static uint8_t tensorArena[ACTIVATION_BUF_SZ] ACTIVATION_BUF_ATTRIBUTE;
+ } /* namespace app */
+} /* namespace arm */
+
+extern uint8_t* GetModelPointer();
+extern size_t GetModelLen();
enum opcodes
{
@@ -49,12 +59,23 @@ void main_loop()
arm::app::AdModel model; /* Model wrapper object. */
/* Load the model. */
- if (!model.Init())
+ if (!model.Init(arm::app::tensorArena,
+ sizeof(arm::app::tensorArena),
+ GetModelPointer(),
+ GetModelLen()))
{
printf_err("failed to initialise model\n");
return;
}
+#if !defined(ARM_NPU)
+ /* If it is not a NPU build check if the model contains a NPU operator */
+ if (model.ContainsEthosUOperator()) {
+ printf_err("No driver support for Ethos-U operator found in the model.\n");
+ return;
+ }
+#endif /* ARM_NPU */
+
/* Instantiate application context. */
arm::app::ApplicationContext caseContext;
diff --git a/source/use_case/ad/src/MelSpectrogram.cc b/source/use_case/ad/src/MelSpectrogram.cc
deleted file mode 100644
index ff0c536..0000000
--- a/source/use_case/ad/src/MelSpectrogram.cc
+++ /dev/null
@@ -1,316 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "MelSpectrogram.hpp"
-
-#include "PlatformMath.hpp"
-#include "log_macros.h"
-
-#include <cfloat>
-#include <cinttypes>
-
-namespace arm {
-namespace app {
-namespace audio {
-
- MelSpecParams::MelSpecParams(
- const float samplingFreq,
- const uint32_t numFbankBins,
- const float melLoFreq,
- const float melHiFreq,
- const uint32_t frameLen,
- const bool useHtkMethod):
- m_samplingFreq(samplingFreq),
- m_numFbankBins(numFbankBins),
- m_melLoFreq(melLoFreq),
- m_melHiFreq(melHiFreq),
- m_frameLen(frameLen),
-
- /* Smallest power of 2 >= frame length. */
- m_frameLenPadded(pow(2, ceil((log(frameLen)/log(2))))),
- m_useHtkMethod(useHtkMethod)
- {}
-
- std::string MelSpecParams::Str() const
- {
- char strC[1024];
- snprintf(strC, sizeof(strC) - 1, "\n \
- \n\t Sampling frequency: %f\
- \n\t Number of filter banks: %" PRIu32 "\
- \n\t Mel frequency limit (low): %f\
- \n\t Mel frequency limit (high): %f\
- \n\t Frame length: %" PRIu32 "\
- \n\t Padded frame length: %" PRIu32 "\
- \n\t Using HTK for Mel scale: %s\n",
- this->m_samplingFreq, this->m_numFbankBins, this->m_melLoFreq,
- this->m_melHiFreq, this->m_frameLen,
- this->m_frameLenPadded, this->m_useHtkMethod ? "yes" : "no");
- return std::string{strC};
- }
-
- MelSpectrogram::MelSpectrogram(const MelSpecParams& params):
- m_params(params),
- m_filterBankInitialised(false)
- {
- this->m_buffer = std::vector<float>(
- this->m_params.m_frameLenPadded, 0.0);
- this->m_frame = std::vector<float>(
- this->m_params.m_frameLenPadded, 0.0);
- this->m_melEnergies = std::vector<float>(
- this->m_params.m_numFbankBins, 0.0);
-
- this->m_windowFunc = std::vector<float>(this->m_params.m_frameLen);
- const auto multiplier = static_cast<float>(2 * M_PI / this->m_params.m_frameLen);
-
- /* Create window function. */
- for (size_t i = 0; i < this->m_params.m_frameLen; ++i) {
- this->m_windowFunc[i] = (0.5 - (0.5 *
- math::MathUtils::CosineF32(static_cast<float>(i) * multiplier)));
- }
-
- math::MathUtils::FftInitF32(this->m_params.m_frameLenPadded, this->m_fftInstance);
- debug("Instantiated Mel Spectrogram object: %s\n", this->m_params.Str().c_str());
- }
-
- void MelSpectrogram::Init()
- {
- this->InitMelFilterBank();
- }
-
- float MelSpectrogram::MelScale(const float freq, const bool useHTKMethod)
- {
- if (useHTKMethod) {
- return 1127.0f * logf (1.0f + freq / 700.0f);
- } else {
- /* Slaney formula for mel scale. */
- float mel = freq / ms_freqStep;
-
- if (freq >= ms_minLogHz) {
- mel = ms_minLogMel + logf(freq / ms_minLogHz) / ms_logStep;
- }
- return mel;
- }
- }
-
- float MelSpectrogram::InverseMelScale(const float melFreq, const bool useHTKMethod)
- {
- if (useHTKMethod) {
- return 700.0f * (expf (melFreq / 1127.0f) - 1.0f);
- } else {
- /* Slaney formula for inverse mel scale. */
- float freq = ms_freqStep * melFreq;
-
- if (melFreq >= ms_minLogMel) {
- freq = ms_minLogHz * expf(ms_logStep * (melFreq - ms_minLogMel));
- }
- return freq;
- }
- }
-
- bool MelSpectrogram::ApplyMelFilterBank(
- std::vector<float>& fftVec,
- std::vector<std::vector<float>>& melFilterBank,
- std::vector<uint32_t>& filterBankFilterFirst,
- std::vector<uint32_t>& filterBankFilterLast,
- std::vector<float>& melEnergies)
- {
- const size_t numBanks = melEnergies.size();
-
- if (numBanks != filterBankFilterFirst.size() ||
- numBanks != filterBankFilterLast.size()) {
- printf_err("unexpected filter bank lengths\n");
- return false;
- }
-
- for (size_t bin = 0; bin < numBanks; ++bin) {
- auto filterBankIter = melFilterBank[bin].begin();
- auto end = melFilterBank[bin].end();
- float melEnergy = FLT_MIN; /* Avoid log of zero at later stages */
- const uint32_t firstIndex = filterBankFilterFirst[bin];
- const uint32_t lastIndex = std::min<int32_t>(filterBankFilterLast[bin], fftVec.size() - 1);
-
- for (uint32_t i = firstIndex; i <= lastIndex && filterBankIter != end; ++i) {
- float energyRep = math::MathUtils::SqrtF32(fftVec[i]);
- melEnergy += (*filterBankIter++ * energyRep);
- }
-
- melEnergies[bin] = melEnergy;
- }
-
- return true;
- }
-
- void MelSpectrogram::ConvertToLogarithmicScale(std::vector<float>& melEnergies)
- {
- for (float& melEnergy : melEnergies) {
- melEnergy = logf(melEnergy);
- }
- }
-
- void MelSpectrogram::ConvertToPowerSpectrum()
- {
- const uint32_t halfDim = this->m_buffer.size() / 2;
-
- /* Handle this special case. */
- float firstEnergy = this->m_buffer[0] * this->m_buffer[0];
- float lastEnergy = this->m_buffer[1] * this->m_buffer[1];
-
- math::MathUtils::ComplexMagnitudeSquaredF32(
- this->m_buffer.data(),
- this->m_buffer.size(),
- this->m_buffer.data(),
- this->m_buffer.size()/2);
-
- this->m_buffer[0] = firstEnergy;
- this->m_buffer[halfDim] = lastEnergy;
- }
-
- float MelSpectrogram::GetMelFilterBankNormaliser(
- const float& leftMel,
- const float& rightMel,
- const bool useHTKMethod)
- {
- UNUSED(leftMel);
- UNUSED(rightMel);
- UNUSED(useHTKMethod);
-
- /* By default, no normalisation => return 1 */
- return 1.f;
- }
-
- void MelSpectrogram::InitMelFilterBank()
- {
- if (!this->IsMelFilterBankInited()) {
- this->m_melFilterBank = this->CreateMelFilterBank();
- this->m_filterBankInitialised = true;
- }
- }
-
- bool MelSpectrogram::IsMelFilterBankInited() const
- {
- return this->m_filterBankInitialised;
- }
-
- std::vector<float> MelSpectrogram::ComputeMelSpec(const std::vector<int16_t>& audioData, float trainingMean)
- {
- this->InitMelFilterBank();
-
- /* TensorFlow way of normalizing .wav data to (-1, 1). */
- constexpr float normaliser = 1.0/(1<<15);
- for (size_t i = 0; i < this->m_params.m_frameLen; ++i) {
- this->m_frame[i] = static_cast<float>(audioData[i]) * normaliser;
- }
-
- /* Apply window function to input frame. */
- for(size_t i = 0; i < this->m_params.m_frameLen; ++i) {
- this->m_frame[i] *= this->m_windowFunc[i];
- }
-
- /* Set remaining frame values to 0. */
- std::fill(this->m_frame.begin() + this->m_params.m_frameLen,this->m_frame.end(), 0);
-
- /* Compute FFT. */
- math::MathUtils::FftF32(this->m_frame, this->m_buffer, this->m_fftInstance);
-
- /* Convert to power spectrum. */
- this->ConvertToPowerSpectrum();
-
- /* Apply mel filterbanks. */
- if (!this->ApplyMelFilterBank(this->m_buffer,
- this->m_melFilterBank,
- this->m_filterBankFilterFirst,
- this->m_filterBankFilterLast,
- this->m_melEnergies)) {
- printf_err("Failed to apply MEL filter banks\n");
- }
-
- /* Convert to logarithmic scale */
- this->ConvertToLogarithmicScale(this->m_melEnergies);
-
- /* Perform mean subtraction. */
- for (auto& energy:this->m_melEnergies) {
- energy -= trainingMean;
- }
-
- return this->m_melEnergies;
- }
-
- std::vector<std::vector<float>> MelSpectrogram::CreateMelFilterBank()
- {
- size_t numFftBins = this->m_params.m_frameLenPadded / 2;
- float fftBinWidth = static_cast<float>(this->m_params.m_samplingFreq) / this->m_params.m_frameLenPadded;
-
- float melLowFreq = MelSpectrogram::MelScale(this->m_params.m_melLoFreq,
- this->m_params.m_useHtkMethod);
- float melHighFreq = MelSpectrogram::MelScale(this->m_params.m_melHiFreq,
- this->m_params.m_useHtkMethod);
- float melFreqDelta = (melHighFreq - melLowFreq) / (this->m_params.m_numFbankBins + 1);
-
- std::vector<float> thisBin = std::vector<float>(numFftBins);
- std::vector<std::vector<float>> melFilterBank(
- this->m_params.m_numFbankBins);
- this->m_filterBankFilterFirst =
- std::vector<uint32_t>(this->m_params.m_numFbankBins);
- this->m_filterBankFilterLast =
- std::vector<uint32_t>(this->m_params.m_numFbankBins);
-
- for (size_t bin = 0; bin < this->m_params.m_numFbankBins; bin++) {
- float leftMel = melLowFreq + bin * melFreqDelta;
- float centerMel = melLowFreq + (bin + 1) * melFreqDelta;
- float rightMel = melLowFreq + (bin + 2) * melFreqDelta;
-
- uint32_t firstIndex = 0;
- uint32_t lastIndex = 0;
- bool firstIndexFound = false;
- const float normaliser = this->GetMelFilterBankNormaliser(leftMel, rightMel, this->m_params.m_useHtkMethod);
-
- for (size_t i = 0; i < numFftBins; ++i) {
- float freq = (fftBinWidth * i); /* Center freq of this fft bin. */
- float mel = MelSpectrogram::MelScale(freq, this->m_params.m_useHtkMethod);
- thisBin[i] = 0.0;
-
- if (mel > leftMel && mel < rightMel) {
- float weight;
- if (mel <= centerMel) {
- weight = (mel - leftMel) / (centerMel - leftMel);
- } else {
- weight = (rightMel - mel) / (rightMel - centerMel);
- }
-
- thisBin[i] = weight * normaliser;
- if (!firstIndexFound) {
- firstIndex = i;
- firstIndexFound = true;
- }
- lastIndex = i;
- }
- }
-
- this->m_filterBankFilterFirst[bin] = firstIndex;
- this->m_filterBankFilterLast[bin] = lastIndex;
-
- /* Copy the part we care about. */
- for (uint32_t i = firstIndex; i <= lastIndex; ++i) {
- melFilterBank[bin].push_back(thisBin[i]);
- }
- }
-
- return melFilterBank;
- }
-
-} /* namespace audio */
-} /* namespace app */
-} /* namespace arm */
diff --git a/source/use_case/ad/usecase.cmake b/source/use_case/ad/usecase.cmake
index 23b4c32..06d7681 100644
--- a/source/use_case/ad/usecase.cmake
+++ b/source/use_case/ad/usecase.cmake
@@ -15,6 +15,9 @@
# limitations under the License.
#----------------------------------------------------------------------------
+# Append the API to use for this use case
+list(APPEND ${use_case}_API_LIST "ad")
+
USER_OPTION(${use_case}_FILE_PATH "Directory with custom WAV input files, or path to a single input WAV file, to use in the evaluation application."
${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/samples/
PATH_OR_FILE)
diff --git a/source/use_case/asr/include/AsrClassifier.hpp b/source/use_case/asr/include/AsrClassifier.hpp
deleted file mode 100644
index a07a721..0000000
--- a/source/use_case/asr/include/AsrClassifier.hpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ASR_CLASSIFIER_HPP
-#define ASR_CLASSIFIER_HPP
-
-#include "Classifier.hpp"
-
-namespace arm {
-namespace app {
-
- class AsrClassifier : public Classifier {
- public:
- /**
- * @brief Gets the top N classification results from the
- * output vector.
- * @param[in] outputTensor Inference output tensor from an NN model.
- * @param[out] vecResults A vector of classification results
- * populated by this function.
- * @param[in] labels Labels vector to match classified classes
- * @param[in] topNCount Number of top classifications to pick.
- * @param[in] use_softmax Whether softmax scaling should be applied to model output.
- * @return true if successful, false otherwise.
- **/
- bool GetClassificationResults(TfLiteTensor* outputTensor,
- std::vector<ClassificationResult>& vecResults,
- const std::vector<std::string>& labels,
- uint32_t topNCount, bool use_softmax = false) override;
-
- private:
- /**
- * @brief Utility function that gets the top 1 classification results from the
- * output tensor (vector of vector).
- * @param[in] tensor Inference output tensor from an NN model.
- * @param[out] vecResults Vector of classification results populated by this function.
- * @param[in] labels Labels vector to match classified classes.
- * @param[in] scale Quantization scale.
- * @param[in] zeroPoint Quantization zero point.
- * @return true if successful, false otherwise.
- **/
- template<typename T>
- bool GetTopResults(TfLiteTensor* tensor,
- std::vector<ClassificationResult>& vecResults,
- const std::vector<std::string>& labels, double scale, double zeroPoint);
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* ASR_CLASSIFIER_HPP */ \ No newline at end of file
diff --git a/source/use_case/asr/include/AsrResult.hpp b/source/use_case/asr/include/AsrResult.hpp
deleted file mode 100644
index ed826d0..0000000
--- a/source/use_case/asr/include/AsrResult.hpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ASR_RESULT_HPP
-#define ASR_RESULT_HPP
-
-#include "ClassificationResult.hpp"
-
-#include <vector>
-
-namespace arm {
-namespace app {
-namespace asr {
-
- using ResultVec = std::vector<arm::app::ClassificationResult>;
-
- /* Structure for holding ASR result. */
- class AsrResult {
-
- public:
- ResultVec m_resultVec; /* Container for "thresholded" classification results. */
- float m_timeStamp; /* Audio timestamp for this result. */
- uint32_t m_inferenceNumber; /* Corresponding inference number. */
- float m_threshold; /* Threshold value for `m_resultVec.` */
-
- AsrResult() = delete;
- AsrResult(ResultVec& resultVec,
- const float timestamp,
- const uint32_t inferenceIdx,
- const float scoreThreshold) {
-
- this->m_threshold = scoreThreshold;
- this->m_timeStamp = timestamp;
- this->m_inferenceNumber = inferenceIdx;
-
- this->m_resultVec = ResultVec();
- for (auto& i : resultVec) {
- if (i.m_normalisedVal >= this->m_threshold) {
- this->m_resultVec.emplace_back(i);
- }
- }
- }
- ~AsrResult() = default;
- };
-
-} /* namespace asr */
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* ASR_RESULT_HPP */ \ No newline at end of file
diff --git a/source/use_case/asr/include/OutputDecode.hpp b/source/use_case/asr/include/OutputDecode.hpp
deleted file mode 100644
index 9d39057..0000000
--- a/source/use_case/asr/include/OutputDecode.hpp
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ASR_OUTPUT_DECODE_HPP
-#define ASR_OUTPUT_DECODE_HPP
-
-#include "AsrClassifier.hpp"
-
-namespace arm {
-namespace app {
-namespace audio {
-namespace asr {
-
- /**
- * @brief Gets the top N classification results from the
- * output vector.
- * @param[in] vecResults Label output from classifier.
- * @return true if successful, false otherwise.
- **/
- std::string DecodeOutput(const std::vector<ClassificationResult>& vecResults);
-
-} /* namespace asr */
-} /* namespace audio */
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* ASR_OUTPUT_DECODE_HPP */ \ No newline at end of file
diff --git a/source/use_case/asr/include/Wav2LetterMfcc.hpp b/source/use_case/asr/include/Wav2LetterMfcc.hpp
deleted file mode 100644
index b5a21d3..0000000
--- a/source/use_case/asr/include/Wav2LetterMfcc.hpp
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ASR_WAV2LETTER_MFCC_HPP
-#define ASR_WAV2LETTER_MFCC_HPP
-
-#include "Mfcc.hpp"
-
-namespace arm {
-namespace app {
-namespace audio {
-
- /* Class to provide Wav2Letter specific MFCC calculation requirements. */
- class Wav2LetterMFCC : public MFCC {
-
- public:
- static constexpr uint32_t ms_defaultSamplingFreq = 16000;
- static constexpr uint32_t ms_defaultNumFbankBins = 128;
- static constexpr uint32_t ms_defaultMelLoFreq = 0;
- static constexpr uint32_t ms_defaultMelHiFreq = 8000;
- static constexpr bool ms_defaultUseHtkMethod = false;
-
- explicit Wav2LetterMFCC(const size_t numFeats, const size_t frameLen)
- : MFCC(MfccParams(
- ms_defaultSamplingFreq, ms_defaultNumFbankBins,
- ms_defaultMelLoFreq, ms_defaultMelHiFreq,
- numFeats, frameLen, ms_defaultUseHtkMethod))
- {}
-
- Wav2LetterMFCC() = delete;
- ~Wav2LetterMFCC() = default;
-
- protected:
-
- /**
- * @brief Overrides base class implementation of this function.
- * @param[in] fftVec Vector populated with FFT magnitudes
- * @param[in] melFilterBank 2D Vector with filter bank weights
- * @param[in] filterBankFilterFirst Vector containing the first indices of filter bank
- * to be used for each bin.
- * @param[in] filterBankFilterLast Vector containing the last indices of filter bank
- * to be used for each bin.
- * @param[out] melEnergies Pre-allocated vector of MEL energies to be
- * populated.
- * @return true if successful, false otherwise
- */
- bool ApplyMelFilterBank(
- std::vector<float>& fftVec,
- std::vector<std::vector<float>>& melFilterBank,
- std::vector<uint32_t>& filterBankFilterFirst,
- std::vector<uint32_t>& filterBankFilterLast,
- std::vector<float>& melEnergies) override;
-
- /**
- * @brief Override for the base class implementation convert mel
- * energies to logarithmic scale. The difference from
- * default behaviour is that the power is converted to dB
- * and subsequently clamped.
- * @param[in,out] melEnergies 1D vector of Mel energies
- **/
- void ConvertToLogarithmicScale(std::vector<float>& melEnergies) override;
-
- /**
- * @brief Create a matrix used to calculate Discrete Cosine
- * Transform. Override for the base class' default
- * implementation as the first and last elements
- * use a different normaliser.
- * @param[in] inputLength input length of the buffer on which
- * DCT will be performed
- * @param[in] coefficientCount Total coefficients per input length.
- * @return 1D vector with inputLength x coefficientCount elements
- * populated with DCT coefficients.
- */
- std::vector<float> CreateDCTMatrix(int32_t inputLength,
- int32_t coefficientCount) override;
-
- /**
- * @brief Given the low and high Mel values, get the normaliser
- * for weights to be applied when populating the filter
- * bank. Override for the base class implementation.
- * @param[in] leftMel Low Mel frequency value.
- * @param[in] rightMel High Mel frequency value.
- * @param[in] useHTKMethod bool to signal if HTK method is to be
- * used for calculation.
- * @return Value to use for normalising.
- */
- float GetMelFilterBankNormaliser(const float& leftMel,
- const float& rightMel,
- bool useHTKMethod) override;
- };
-
-} /* namespace audio */
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* ASR_WAV2LETTER_MFCC_HPP */ \ No newline at end of file
diff --git a/source/use_case/asr/include/Wav2LetterModel.hpp b/source/use_case/asr/include/Wav2LetterModel.hpp
deleted file mode 100644
index bec70ab..0000000
--- a/source/use_case/asr/include/Wav2LetterModel.hpp
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ASR_WAV2LETTER_MODEL_HPP
-#define ASR_WAV2LETTER_MODEL_HPP
-
-#include "Model.hpp"
-
-extern const int g_FrameLength;
-extern const int g_FrameStride;
-extern const float g_ScoreThreshold;
-extern const int g_ctxLen;
-
-namespace arm {
-namespace app {
-
- class Wav2LetterModel : public Model {
-
- public:
- /* Indices for the expected model - based on input and output tensor shapes */
- static constexpr uint32_t ms_inputRowsIdx = 1;
- static constexpr uint32_t ms_inputColsIdx = 2;
- static constexpr uint32_t ms_outputRowsIdx = 2;
- static constexpr uint32_t ms_outputColsIdx = 3;
-
- /* Model specific constants. */
- static constexpr uint32_t ms_blankTokenIdx = 28;
- static constexpr uint32_t ms_numMfccFeatures = 13;
-
- protected:
- /** @brief Gets the reference to op resolver interface class. */
- const tflite::MicroOpResolver& GetOpResolver() override;
-
- /** @brief Adds operations to the op resolver instance. */
- bool EnlistOperations() override;
-
- const uint8_t* ModelPointer() override;
-
- size_t ModelSize() override;
-
- private:
- /* Maximum number of individual operations that can be enlisted. */
- static constexpr int ms_maxOpCnt = 5;
-
- /* A mutable op resolver instance. */
- tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* ASR_WAV2LETTER_MODEL_HPP */
diff --git a/source/use_case/asr/include/Wav2LetterPostprocess.hpp b/source/use_case/asr/include/Wav2LetterPostprocess.hpp
deleted file mode 100644
index 446014d..0000000
--- a/source/use_case/asr/include/Wav2LetterPostprocess.hpp
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ASR_WAV2LETTER_POSTPROCESS_HPP
-#define ASR_WAV2LETTER_POSTPROCESS_HPP
-
-#include "TensorFlowLiteMicro.hpp" /* TensorFlow headers. */
-#include "BaseProcessing.hpp"
-#include "AsrClassifier.hpp"
-#include "AsrResult.hpp"
-#include "log_macros.h"
-
-namespace arm {
-namespace app {
-
- /**
- * @brief Helper class to manage tensor post-processing for "wav2letter"
- * output.
- */
- class AsrPostProcess : public BasePostProcess {
- public:
- bool m_lastIteration = false; /* Flag to set if processing the last set of data for a clip. */
-
- /**
- * @brief Constructor
- * @param[in] outputTensor Pointer to the TFLite Micro output Tensor.
- * @param[in] classifier Object used to get top N results from classification.
- * @param[in] labels Vector of string labels to identify each output of the model.
- * @param[in/out] result Vector of classification results to store decoded outputs.
- * @param[in] outputContextLen Left/right context length for output tensor.
- * @param[in] blankTokenIdx Index in the labels that the "Blank token" takes.
- * @param[in] reductionAxis The axis that the logits of each time step is on.
- **/
- AsrPostProcess(TfLiteTensor* outputTensor, AsrClassifier& classifier,
- const std::vector<std::string>& labels, asr::ResultVec& result,
- uint32_t outputContextLen,
- uint32_t blankTokenIdx, uint32_t reductionAxis);
-
- /**
- * @brief Should perform post-processing of the result of inference then
- * populate ASR result data for any later use.
- * @return true if successful, false otherwise.
- **/
- bool DoPostProcess() override;
-
- /** @brief Gets the output inner length for post-processing. */
- static uint32_t GetOutputInnerLen(const TfLiteTensor*, uint32_t outputCtxLen);
-
- /** @brief Gets the output context length (left/right) for post-processing. */
- static uint32_t GetOutputContextLen(const Model& model, uint32_t inputCtxLen);
-
- /** @brief Gets the number of feature vectors to be computed. */
- static uint32_t GetNumFeatureVectors(const Model& model);
-
- private:
- AsrClassifier& m_classifier; /* ASR Classifier object. */
- TfLiteTensor* m_outputTensor; /* Model output tensor. */
- const std::vector<std::string>& m_labels; /* ASR Labels. */
- asr::ResultVec & m_results; /* Results vector for a single inference. */
- uint32_t m_outputContextLen; /* lengths of left/right contexts for output. */
- uint32_t m_outputInnerLen; /* Length of output inner context. */
- uint32_t m_totalLen; /* Total length of the required axis. */
- uint32_t m_countIterations; /* Current number of iterations. */
- uint32_t m_blankTokenIdx; /* Index of the labels blank token. */
- uint32_t m_reductionAxisIdx; /* Axis containing output logits for a single step. */
-
- /**
- * @brief Checks if the tensor and axis index are valid
- * inputs to the object - based on how it has been initialised.
- * @return true if valid, false otherwise.
- */
- bool IsInputValid(TfLiteTensor* tensor,
- uint32_t axisIdx) const;
-
- /**
- * @brief Gets the tensor data element size in bytes based
- * on the tensor type.
- * @return Size in bytes, 0 if not supported.
- */
- static uint32_t GetTensorElementSize(TfLiteTensor* tensor);
-
- /**
- * @brief Erases sections from the data assuming row-wise
- * arrangement along the context axis.
- * @return true if successful, false otherwise.
- */
- bool EraseSectionsRowWise(uint8_t* ptrData,
- uint32_t strideSzBytes,
- bool lastIteration);
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* ASR_WAV2LETTER_POSTPROCESS_HPP */ \ No newline at end of file
diff --git a/source/use_case/asr/include/Wav2LetterPreprocess.hpp b/source/use_case/asr/include/Wav2LetterPreprocess.hpp
deleted file mode 100644
index dc9a415..0000000
--- a/source/use_case/asr/include/Wav2LetterPreprocess.hpp
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ASR_WAV2LETTER_PREPROCESS_HPP
-#define ASR_WAV2LETTER_PREPROCESS_HPP
-
-#include "Wav2LetterModel.hpp"
-#include "Wav2LetterMfcc.hpp"
-#include "AudioUtils.hpp"
-#include "DataStructures.hpp"
-#include "BaseProcessing.hpp"
-#include "log_macros.h"
-
-namespace arm {
-namespace app {
-
- /* Class to facilitate pre-processing calculation for Wav2Letter model
- * for ASR. */
- using AudioWindow = audio::SlidingWindow<const int16_t>;
-
- class AsrPreProcess : public BasePreProcess {
- public:
- /**
- * @brief Constructor.
- * @param[in] inputTensor Pointer to the TFLite Micro input Tensor.
- * @param[in] numMfccFeatures Number of MFCC features per window.
- * @param[in] numFeatureFrames Number of MFCC vectors that need to be calculated
- * for an inference.
- * @param[in] mfccWindowLen Number of audio elements to calculate MFCC features per window.
- * @param[in] mfccWindowStride Stride (in number of elements) for moving the MFCC window.
- */
- AsrPreProcess(TfLiteTensor* inputTensor,
- uint32_t numMfccFeatures,
- uint32_t numFeatureFrames,
- uint32_t mfccWindowLen,
- uint32_t mfccWindowStride);
-
- /**
- * @brief Calculates the features required from audio data. This
- * includes MFCC, first and second order deltas,
- * normalisation and finally, quantisation. The tensor is
- * populated with features from a given window placed along
- * in a single row.
- * @param[in] audioData Pointer to the first element of audio data.
- * @param[in] audioDataLen Number of elements in the audio data.
- * @return true if successful, false in case of error.
- */
- bool DoPreProcess(const void* audioData, size_t audioDataLen) override;
-
- protected:
- /**
- * @brief Computes the first and second order deltas for the
- * MFCC buffers - they are assumed to be populated.
- *
- * @param[in] mfcc MFCC buffers.
- * @param[out] delta1 Result of the first diff computation.
- * @param[out] delta2 Result of the second diff computation.
- * @return true if successful, false otherwise.
- */
- static bool ComputeDeltas(Array2d<float>& mfcc,
- Array2d<float>& delta1,
- Array2d<float>& delta2);
-
- /**
- * @brief Given a 2D vector of floats, rescale it to have mean of 0 and
- * standard deviation of 1.
- * @param[in,out] vec Vector of vector of floats.
- */
- static void StandardizeVecF32(Array2d<float>& vec);
-
- /**
- * @brief Standardizes all the MFCC and delta buffers to have mean 0 and std. dev 1.
- */
- void Standarize();
-
- /**
- * @brief Given the quantisation and data type limits, computes
- * the quantised values of a floating point input data.
- * @param[in] elem Element to be quantised.
- * @param[in] quantScale Scale.
- * @param[in] quantOffset Offset.
- * @param[in] minVal Numerical limit - minimum.
- * @param[in] maxVal Numerical limit - maximum.
- * @return Floating point quantised value.
- */
- static float GetQuantElem(
- float elem,
- float quantScale,
- int quantOffset,
- float minVal,
- float maxVal);
-
- /**
- * @brief Quantises the MFCC and delta buffers, and places them
- * in the output buffer. While doing so, it transposes
- * the data. Reason: Buffers in this class are arranged
- * for "time" axis to be row major. Primary reason for
- * this being the convolution speed up (as we can use
- * contiguous memory). The output, however, requires the
- * time axis to be in column major arrangement.
- * @param[in] outputBuf Pointer to the output buffer.
- * @param[in] outputBufSz Output buffer's size.
- * @param[in] quantScale Quantisation scale.
- * @param[in] quantOffset Quantisation offset.
- */
- template <typename T>
- bool Quantise(
- T* outputBuf,
- const uint32_t outputBufSz,
- const float quantScale,
- const int quantOffset)
- {
- /* Check the output size will fit everything. */
- if (outputBufSz < (this->m_mfccBuf.size(0) * 3 * sizeof(T))) {
- printf_err("Tensor size too small for features\n");
- return false;
- }
-
- /* Populate. */
- T* outputBufMfcc = outputBuf;
- T* outputBufD1 = outputBuf + this->m_numMfccFeats;
- T* outputBufD2 = outputBufD1 + this->m_numMfccFeats;
- const uint32_t ptrIncr = this->m_numMfccFeats * 2; /* (3 vectors - 1 vector) */
-
- const float minVal = std::numeric_limits<T>::min();
- const float maxVal = std::numeric_limits<T>::max();
-
- /* Need to transpose while copying and concatenating the tensor. */
- for (uint32_t j = 0; j < this->m_numFeatureFrames; ++j) {
- for (uint32_t i = 0; i < this->m_numMfccFeats; ++i) {
- *outputBufMfcc++ = static_cast<T>(AsrPreProcess::GetQuantElem(
- this->m_mfccBuf(i, j), quantScale,
- quantOffset, minVal, maxVal));
- *outputBufD1++ = static_cast<T>(AsrPreProcess::GetQuantElem(
- this->m_delta1Buf(i, j), quantScale,
- quantOffset, minVal, maxVal));
- *outputBufD2++ = static_cast<T>(AsrPreProcess::GetQuantElem(
- this->m_delta2Buf(i, j), quantScale,
- quantOffset, minVal, maxVal));
- }
- outputBufMfcc += ptrIncr;
- outputBufD1 += ptrIncr;
- outputBufD2 += ptrIncr;
- }
-
- return true;
- }
-
- private:
- audio::Wav2LetterMFCC m_mfcc; /* MFCC instance. */
- TfLiteTensor* m_inputTensor; /* Model input tensor. */
-
- /* Actual buffers to be populated. */
- Array2d<float> m_mfccBuf; /* Contiguous buffer 1D: MFCC */
- Array2d<float> m_delta1Buf; /* Contiguous buffer 1D: Delta 1 */
- Array2d<float> m_delta2Buf; /* Contiguous buffer 1D: Delta 2 */
-
- uint32_t m_mfccWindowLen; /* Window length for MFCC. */
- uint32_t m_mfccWindowStride; /* Window stride len for MFCC. */
- uint32_t m_numMfccFeats; /* Number of MFCC features per window. */
- uint32_t m_numFeatureFrames; /* How many sets of m_numMfccFeats. */
- AudioWindow m_mfccSlidingWindow; /* Sliding window to calculate MFCCs. */
-
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* ASR_WAV2LETTER_PREPROCESS_HPP */ \ No newline at end of file
diff --git a/source/use_case/asr/src/AsrClassifier.cc b/source/use_case/asr/src/AsrClassifier.cc
deleted file mode 100644
index 4ba8c7b..0000000
--- a/source/use_case/asr/src/AsrClassifier.cc
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "AsrClassifier.hpp"
-
-#include "log_macros.h"
-#include "TensorFlowLiteMicro.hpp"
-#include "Wav2LetterModel.hpp"
-
-namespace arm {
-namespace app {
-
- template<typename T>
- bool AsrClassifier::GetTopResults(TfLiteTensor* tensor,
- std::vector<ClassificationResult>& vecResults,
- const std::vector <std::string>& labels, double scale, double zeroPoint)
- {
- const uint32_t nElems = tensor->dims->data[Wav2LetterModel::ms_outputRowsIdx];
- const uint32_t nLetters = tensor->dims->data[Wav2LetterModel::ms_outputColsIdx];
-
- if (nLetters != labels.size()) {
- printf("Output size doesn't match the labels' size\n");
- return false;
- }
-
- /* NOTE: tensor's size verification against labels should be
- * checked by the calling/public function. */
- if (nLetters < 1) {
- return false;
- }
-
- /* Final results' container. */
- vecResults = std::vector<ClassificationResult>(nElems);
-
- T* tensorData = tflite::GetTensorData<T>(tensor);
-
- /* Get the top 1 results. */
- for (uint32_t i = 0, row = 0; i < nElems; ++i, row+=nLetters) {
- std::pair<T, uint32_t> top_1 = std::make_pair(tensorData[row + 0], 0);
-
- for (uint32_t j = 1; j < nLetters; ++j) {
- if (top_1.first < tensorData[row + j]) {
- top_1.first = tensorData[row + j];
- top_1.second = j;
- }
- }
-
- double score = static_cast<int> (top_1.first);
- vecResults[i].m_normalisedVal = scale * (score - zeroPoint);
- vecResults[i].m_label = labels[top_1.second];
- vecResults[i].m_labelIdx = top_1.second;
- }
-
- return true;
- }
- template bool AsrClassifier::GetTopResults<uint8_t>(TfLiteTensor* tensor,
- std::vector<ClassificationResult>& vecResults,
- const std::vector <std::string>& labels,
- double scale, double zeroPoint);
- template bool AsrClassifier::GetTopResults<int8_t>(TfLiteTensor* tensor,
- std::vector<ClassificationResult>& vecResults,
- const std::vector <std::string>& labels,
- double scale, double zeroPoint);
-
- bool AsrClassifier::GetClassificationResults(
- TfLiteTensor* outputTensor,
- std::vector<ClassificationResult>& vecResults,
- const std::vector <std::string>& labels, uint32_t topNCount, bool use_softmax)
- {
- UNUSED(use_softmax);
- vecResults.clear();
-
- constexpr int minTensorDims = static_cast<int>(
- (Wav2LetterModel::ms_outputRowsIdx > Wav2LetterModel::ms_outputColsIdx)?
- Wav2LetterModel::ms_outputRowsIdx : Wav2LetterModel::ms_outputColsIdx);
-
- constexpr uint32_t outColsIdx = Wav2LetterModel::ms_outputColsIdx;
-
- /* Sanity checks. */
- if (outputTensor == nullptr) {
- printf_err("Output vector is null pointer.\n");
- return false;
- } else if (outputTensor->dims->size < minTensorDims) {
- printf_err("Output tensor expected to be %dD\n", minTensorDims);
- return false;
- } else if (static_cast<uint32_t>(outputTensor->dims->data[outColsIdx]) < topNCount) {
- printf_err("Output vectors are smaller than %" PRIu32 "\n", topNCount);
- return false;
- } else if (static_cast<uint32_t>(outputTensor->dims->data[outColsIdx]) != labels.size()) {
- printf("Output size doesn't match the labels' size\n");
- return false;
- }
-
- if (topNCount != 1) {
- warn("TopNCount value ignored in this implementation\n");
- }
-
- /* To return the floating point values, we need quantization parameters. */
- QuantParams quantParams = GetTensorQuantParams(outputTensor);
-
- bool resultState;
-
- switch (outputTensor->type) {
- case kTfLiteUInt8:
- resultState = this->GetTopResults<uint8_t>(
- outputTensor, vecResults,
- labels, quantParams.scale,
- quantParams.offset);
- break;
- case kTfLiteInt8:
- resultState = this->GetTopResults<int8_t>(
- outputTensor, vecResults,
- labels, quantParams.scale,
- quantParams.offset);
- break;
- default:
- printf_err("Tensor type %s not supported by classifier\n",
- TfLiteTypeGetName(outputTensor->type));
- return false;
- }
-
- if (!resultState) {
- printf_err("Failed to get sorted set\n");
- return false;
- }
-
- return true;
- }
-
-} /* namespace app */
-} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/asr/src/MainLoop.cc b/source/use_case/asr/src/MainLoop.cc
index a1a9540..7acd319 100644
--- a/source/use_case/asr/src/MainLoop.cc
+++ b/source/use_case/asr/src/MainLoop.cc
@@ -20,7 +20,18 @@
#include "UseCaseCommonUtils.hpp" /* Utils functions. */
#include "AsrClassifier.hpp" /* Classifier. */
#include "InputFiles.hpp" /* Generated audio clip header. */
-#include "log_macros.h"
+#include "log_macros.h" /* Logging functions */
+#include "BufAttributes.hpp" /* Buffer attributes to be applied */
+
+namespace arm {
+namespace app {
+namespace asr {
+ static uint8_t tensorArena[ACTIVATION_BUF_SZ] ACTIVATION_BUF_ATTRIBUTE;
+ extern uint8_t* GetModelPointer();
+ extern size_t GetModelLen();
+} /* namespace asr */
+} /* namespace app */
+} /* namespace arm */
enum opcodes
{
@@ -53,7 +64,10 @@ void main_loop()
arm::app::Wav2LetterModel model; /* Model wrapper object. */
/* Load the model. */
- if (!model.Init()) {
+ if (!model.Init(arm::app::asr::tensorArena,
+ sizeof(arm::app::asr::tensorArena),
+ arm::app::asr::GetModelPointer(),
+ arm::app::asr::GetModelLen())) {
printf_err("Failed to initialise model\n");
return;
} else if (!VerifyTensorDimensions(model)) {
@@ -61,6 +75,14 @@ void main_loop()
return;
}
+#if !defined(ARM_NPU)
+ /* If it is not a NPU build check if the model contains a NPU operator */
+ if (model.ContainsEthosUOperator()) {
+ printf_err("No driver support for Ethos-U operator found in the model.\n");
+ return;
+ }
+#endif /* ARM_NPU */
+
/* Instantiate application context. */
arm::app::ApplicationContext caseContext;
std::vector <std::string> labels;
@@ -71,10 +93,10 @@ void main_loop()
caseContext.Set<arm::app::Profiler&>("profiler", profiler);
caseContext.Set<arm::app::Model&>("model", model);
caseContext.Set<uint32_t>("clipIndex", 0);
- caseContext.Set<uint32_t>("frameLength", g_FrameLength);
- caseContext.Set<uint32_t>("frameStride", g_FrameStride);
- caseContext.Set<float>("scoreThreshold", g_ScoreThreshold); /* Score threshold. */
- caseContext.Set<uint32_t>("ctxLen", g_ctxLen); /* Left and right context length (MFCC feat vectors). */
+ caseContext.Set<uint32_t>("frameLength", arm::app::asr::g_FrameLength);
+ caseContext.Set<uint32_t>("frameStride", arm::app::asr::g_FrameStride);
+ caseContext.Set<float>("scoreThreshold", arm::app::asr::g_ScoreThreshold); /* Score threshold. */
+ caseContext.Set<uint32_t>("ctxLen", arm::app::asr::g_ctxLen); /* Left and right context length (MFCC feat vectors). */
caseContext.Set<const std::vector <std::string>&>("labels", labels);
caseContext.Set<arm::app::AsrClassifier&>("classifier", classifier);
diff --git a/source/use_case/asr/src/OutputDecode.cc b/source/use_case/asr/src/OutputDecode.cc
deleted file mode 100644
index 41fbe07..0000000
--- a/source/use_case/asr/src/OutputDecode.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "OutputDecode.hpp"
-
-namespace arm {
-namespace app {
-namespace audio {
-namespace asr {
-
- std::string DecodeOutput(const std::vector<ClassificationResult>& vecResults)
- {
- std::string CleanOutputBuffer;
-
- for (size_t i = 0; i < vecResults.size(); ++i) /* For all elements in vector. */
- {
- while (i+1 < vecResults.size() &&
- vecResults[i].m_label == vecResults[i+1].m_label) /* While the current element is equal to the next, ignore it and move on. */
- {
- ++i;
- }
- if (vecResults[i].m_label != "$") /* $ is a character used to represent unknown and double characters so should not be in output. */
- {
- CleanOutputBuffer += vecResults[i].m_label; /* If the element is different to the next, it will be appended to CleanOutputBuffer. */
- }
- }
-
- return CleanOutputBuffer; /* Return string type containing clean output. */
- }
-
-} /* namespace asr */
-} /* namespace audio */
-} /* namespace app */
-} /* namespace arm */
diff --git a/source/use_case/asr/src/Wav2LetterMfcc.cc b/source/use_case/asr/src/Wav2LetterMfcc.cc
deleted file mode 100644
index bb29b0f..0000000
--- a/source/use_case/asr/src/Wav2LetterMfcc.cc
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "Wav2LetterMfcc.hpp"
-
-#include "PlatformMath.hpp"
-#include "log_macros.h"
-
-#include <cfloat>
-
-namespace arm {
-namespace app {
-namespace audio {
-
- bool Wav2LetterMFCC::ApplyMelFilterBank(
- std::vector<float>& fftVec,
- std::vector<std::vector<float>>& melFilterBank,
- std::vector<uint32_t>& filterBankFilterFirst,
- std::vector<uint32_t>& filterBankFilterLast,
- std::vector<float>& melEnergies)
- {
- const size_t numBanks = melEnergies.size();
-
- if (numBanks != filterBankFilterFirst.size() ||
- numBanks != filterBankFilterLast.size()) {
- printf_err("Unexpected filter bank lengths\n");
- return false;
- }
-
- for (size_t bin = 0; bin < numBanks; ++bin) {
- auto filterBankIter = melFilterBank[bin].begin();
- auto end = melFilterBank[bin].end();
- /* Avoid log of zero at later stages, same value used in librosa.
- * The number was used during our default wav2letter model training. */
- float melEnergy = 1e-10;
- const uint32_t firstIndex = filterBankFilterFirst[bin];
- const uint32_t lastIndex = std::min<uint32_t>(filterBankFilterLast[bin], fftVec.size() - 1);
-
- for (uint32_t i = firstIndex; i <= lastIndex && filterBankIter != end; ++i) {
- melEnergy += (*filterBankIter++ * fftVec[i]);
- }
-
- melEnergies[bin] = melEnergy;
- }
-
- return true;
- }
-
- void Wav2LetterMFCC::ConvertToLogarithmicScale(
- std::vector<float>& melEnergies)
- {
- float maxMelEnergy = -FLT_MAX;
-
- /* Container for natural logarithms of mel energies. */
- std::vector <float> vecLogEnergies(melEnergies.size(), 0.f);
-
- /* Because we are taking natural logs, we need to multiply by log10(e).
- * Also, for wav2letter model, we scale our log10 values by 10. */
- constexpr float multiplier = 10.0 * /* Default scalar. */
- 0.4342944819032518; /* log10f(std::exp(1.0)) */
-
- /* Take log of the whole vector. */
- math::MathUtils::VecLogarithmF32(melEnergies, vecLogEnergies);
-
- /* Scale the log values and get the max. */
- for (auto iterM = melEnergies.begin(), iterL = vecLogEnergies.begin();
- iterM != melEnergies.end() && iterL != vecLogEnergies.end(); ++iterM, ++iterL) {
-
- *iterM = *iterL * multiplier;
-
- /* Save the max mel energy. */
- if (*iterM > maxMelEnergy) {
- maxMelEnergy = *iterM;
- }
- }
-
- /* Clamp the mel energies. */
- constexpr float maxDb = 80.0;
- const float clampLevelLowdB = maxMelEnergy - maxDb;
- for (float& melEnergy : melEnergies) {
- melEnergy = std::max(melEnergy, clampLevelLowdB);
- }
- }
-
- std::vector<float> Wav2LetterMFCC::CreateDCTMatrix(
- const int32_t inputLength,
- const int32_t coefficientCount)
- {
- std::vector<float> dctMatix(inputLength * coefficientCount);
-
- /* Orthonormal normalization. */
- const float normalizerK0 = 2 * math::MathUtils::SqrtF32(1.0f /
- static_cast<float>(4*inputLength));
- const float normalizer = 2 * math::MathUtils::SqrtF32(1.0f /
- static_cast<float>(2*inputLength));
-
- const float angleIncr = M_PI / inputLength;
- float angle = angleIncr; /* We start using it at k = 1 loop. */
-
- /* First row of DCT will use normalizer K0. */
- for (int32_t n = 0; n < inputLength; ++n) {
- dctMatix[n] = normalizerK0 /* cos(0) = 1 */;
- }
-
- /* Second row (index = 1) onwards, we use standard normalizer. */
- for (int32_t k = 1, m = inputLength; k < coefficientCount; ++k, m += inputLength) {
- for (int32_t n = 0; n < inputLength; ++n) {
- dctMatix[m+n] = normalizer *
- math::MathUtils::CosineF32((n + 0.5f) * angle);
- }
- angle += angleIncr;
- }
- return dctMatix;
- }
-
- float Wav2LetterMFCC::GetMelFilterBankNormaliser(
- const float& leftMel,
- const float& rightMel,
- const bool useHTKMethod)
- {
- /* Slaney normalization for mel weights. */
- return (2.0f / (MFCC::InverseMelScale(rightMel, useHTKMethod) -
- MFCC::InverseMelScale(leftMel, useHTKMethod)));
- }
-
-} /* namespace audio */
-} /* namespace app */
-} /* namespace arm */
diff --git a/source/use_case/asr/src/Wav2LetterModel.cc b/source/use_case/asr/src/Wav2LetterModel.cc
deleted file mode 100644
index 8b38f4f..0000000
--- a/source/use_case/asr/src/Wav2LetterModel.cc
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "Wav2LetterModel.hpp"
-
-#include "log_macros.h"
-
-
-const tflite::MicroOpResolver& arm::app::Wav2LetterModel::GetOpResolver()
-{
- return this->m_opResolver;
-}
-
-bool arm::app::Wav2LetterModel::EnlistOperations()
-{
- this->m_opResolver.AddConv2D();
- this->m_opResolver.AddReshape();
- this->m_opResolver.AddLeakyRelu();
- this->m_opResolver.AddSoftmax();
-
-#if defined(ARM_NPU)
- if (kTfLiteOk == this->m_opResolver.AddEthosU()) {
- info("Added %s support to op resolver\n",
- tflite::GetString_ETHOSU());
- } else {
- printf_err("Failed to add Arm NPU support to op resolver.");
- return false;
- }
-#endif /* ARM_NPU */
-
- return true;
-}
-
-extern uint8_t* GetModelPointer();
-const uint8_t* arm::app::Wav2LetterModel::ModelPointer()
-{
- return GetModelPointer();
-}
-
-extern size_t GetModelLen();
-size_t arm::app::Wav2LetterModel::ModelSize()
-{
- return GetModelLen();
-} \ No newline at end of file
diff --git a/source/use_case/asr/src/Wav2LetterPostprocess.cc b/source/use_case/asr/src/Wav2LetterPostprocess.cc
deleted file mode 100644
index 42f434e..0000000
--- a/source/use_case/asr/src/Wav2LetterPostprocess.cc
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "Wav2LetterPostprocess.hpp"
-
-#include "Wav2LetterModel.hpp"
-#include "log_macros.h"
-
-#include <cmath>
-
-namespace arm {
-namespace app {
-
- AsrPostProcess::AsrPostProcess(TfLiteTensor* outputTensor, AsrClassifier& classifier,
- const std::vector<std::string>& labels, std::vector<ClassificationResult>& results,
- const uint32_t outputContextLen,
- const uint32_t blankTokenIdx, const uint32_t reductionAxisIdx
- ):
- m_classifier(classifier),
- m_outputTensor(outputTensor),
- m_labels{labels},
- m_results(results),
- m_outputContextLen(outputContextLen),
- m_countIterations(0),
- m_blankTokenIdx(blankTokenIdx),
- m_reductionAxisIdx(reductionAxisIdx)
- {
- this->m_outputInnerLen = AsrPostProcess::GetOutputInnerLen(this->m_outputTensor, this->m_outputContextLen);
- this->m_totalLen = (2 * this->m_outputContextLen + this->m_outputInnerLen);
- }
-
- bool AsrPostProcess::DoPostProcess()
- {
- /* Basic checks. */
- if (!this->IsInputValid(this->m_outputTensor, this->m_reductionAxisIdx)) {
- return false;
- }
-
- /* Irrespective of tensor type, we use unsigned "byte" */
- auto* ptrData = tflite::GetTensorData<uint8_t>(this->m_outputTensor);
- const uint32_t elemSz = AsrPostProcess::GetTensorElementSize(this->m_outputTensor);
-
- /* Other sanity checks. */
- if (0 == elemSz) {
- printf_err("Tensor type not supported for post processing\n");
- return false;
- } else if (elemSz * this->m_totalLen > this->m_outputTensor->bytes) {
- printf_err("Insufficient number of tensor bytes\n");
- return false;
- }
-
- /* Which axis do we need to process? */
- switch (this->m_reductionAxisIdx) {
- case Wav2LetterModel::ms_outputRowsIdx:
- this->EraseSectionsRowWise(
- ptrData, elemSz * this->m_outputTensor->dims->data[Wav2LetterModel::ms_outputColsIdx],
- this->m_lastIteration);
- break;
- default:
- printf_err("Unsupported axis index: %" PRIu32 "\n", this->m_reductionAxisIdx);
- return false;
- }
- this->m_classifier.GetClassificationResults(this->m_outputTensor,
- this->m_results, this->m_labels, 1);
-
- return true;
- }
-
- bool AsrPostProcess::IsInputValid(TfLiteTensor* tensor, const uint32_t axisIdx) const
- {
- if (nullptr == tensor) {
- return false;
- }
-
- if (static_cast<int>(axisIdx) >= tensor->dims->size) {
- printf_err("Invalid axis index: %" PRIu32 "; Max: %d\n",
- axisIdx, tensor->dims->size);
- return false;
- }
-
- if (static_cast<int>(this->m_totalLen) !=
- tensor->dims->data[axisIdx]) {
- printf_err("Unexpected tensor dimension for axis %d, got %d, \n",
- axisIdx, tensor->dims->data[axisIdx]);
- return false;
- }
-
- return true;
- }
-
- uint32_t AsrPostProcess::GetTensorElementSize(TfLiteTensor* tensor)
- {
- switch(tensor->type) {
- case kTfLiteUInt8:
- case kTfLiteInt8:
- return 1;
- case kTfLiteInt16:
- return 2;
- case kTfLiteInt32:
- case kTfLiteFloat32:
- return 4;
- default:
- printf_err("Unsupported tensor type %s\n",
- TfLiteTypeGetName(tensor->type));
- }
-
- return 0;
- }
-
- bool AsrPostProcess::EraseSectionsRowWise(
- uint8_t* ptrData,
- const uint32_t strideSzBytes,
- const bool lastIteration)
- {
- /* In this case, the "zero-ing" is quite simple as the region
- * to be zeroed sits in contiguous memory (row-major). */
- const uint32_t eraseLen = strideSzBytes * this->m_outputContextLen;
-
- /* Erase left context? */
- if (this->m_countIterations > 0) {
- /* Set output of each classification window to the blank token. */
- std::memset(ptrData, 0, eraseLen);
- for (size_t windowIdx = 0; windowIdx < this->m_outputContextLen; windowIdx++) {
- ptrData[windowIdx*strideSzBytes + this->m_blankTokenIdx] = 1;
- }
- }
-
- /* Erase right context? */
- if (false == lastIteration) {
- uint8_t* rightCtxPtr = ptrData + (strideSzBytes * (this->m_outputContextLen + this->m_outputInnerLen));
- /* Set output of each classification window to the blank token. */
- std::memset(rightCtxPtr, 0, eraseLen);
- for (size_t windowIdx = 0; windowIdx < this->m_outputContextLen; windowIdx++) {
- rightCtxPtr[windowIdx*strideSzBytes + this->m_blankTokenIdx] = 1;
- }
- }
-
- if (lastIteration) {
- this->m_countIterations = 0;
- } else {
- ++this->m_countIterations;
- }
-
- return true;
- }
-
- uint32_t AsrPostProcess::GetNumFeatureVectors(const Model& model)
- {
- TfLiteTensor* inputTensor = model.GetInputTensor(0);
- const int inputRows = std::max(inputTensor->dims->data[Wav2LetterModel::ms_inputRowsIdx], 0);
- if (inputRows == 0) {
- printf_err("Error getting number of input rows for axis: %" PRIu32 "\n",
- Wav2LetterModel::ms_inputRowsIdx);
- }
- return inputRows;
- }
-
- uint32_t AsrPostProcess::GetOutputInnerLen(const TfLiteTensor* outputTensor, const uint32_t outputCtxLen)
- {
- const uint32_t outputRows = std::max(outputTensor->dims->data[Wav2LetterModel::ms_outputRowsIdx], 0);
- if (outputRows == 0) {
- printf_err("Error getting number of output rows for axis: %" PRIu32 "\n",
- Wav2LetterModel::ms_outputRowsIdx);
- }
-
- /* Watching for underflow. */
- int innerLen = (outputRows - (2 * outputCtxLen));
-
- return std::max(innerLen, 0);
- }
-
- uint32_t AsrPostProcess::GetOutputContextLen(const Model& model, const uint32_t inputCtxLen)
- {
- const uint32_t inputRows = AsrPostProcess::GetNumFeatureVectors(model);
- const uint32_t inputInnerLen = inputRows - (2 * inputCtxLen);
- constexpr uint32_t ms_outputRowsIdx = Wav2LetterModel::ms_outputRowsIdx;
-
- /* Check to make sure that the input tensor supports the above
- * context and inner lengths. */
- if (inputRows <= 2 * inputCtxLen || inputRows <= inputInnerLen) {
- printf_err("Input rows not compatible with ctx of %" PRIu32 "\n",
- inputCtxLen);
- return 0;
- }
-
- TfLiteTensor* outputTensor = model.GetOutputTensor(0);
- const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);
- if (outputRows == 0) {
- printf_err("Error getting number of output rows for axis: %" PRIu32 "\n",
- Wav2LetterModel::ms_outputRowsIdx);
- return 0;
- }
-
- const float inOutRowRatio = static_cast<float>(inputRows) /
- static_cast<float>(outputRows);
-
- return std::round(static_cast<float>(inputCtxLen) / inOutRowRatio);
- }
-
-} /* namespace app */
-} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/asr/src/Wav2LetterPreprocess.cc b/source/use_case/asr/src/Wav2LetterPreprocess.cc
deleted file mode 100644
index 92b0631..0000000
--- a/source/use_case/asr/src/Wav2LetterPreprocess.cc
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "Wav2LetterPreprocess.hpp"
-
-#include "PlatformMath.hpp"
-#include "TensorFlowLiteMicro.hpp"
-
-#include <algorithm>
-#include <cmath>
-
-namespace arm {
-namespace app {
-
- AsrPreProcess::AsrPreProcess(TfLiteTensor* inputTensor, const uint32_t numMfccFeatures,
- const uint32_t numFeatureFrames, const uint32_t mfccWindowLen,
- const uint32_t mfccWindowStride
- ):
- m_mfcc(numMfccFeatures, mfccWindowLen),
- m_inputTensor(inputTensor),
- m_mfccBuf(numMfccFeatures, numFeatureFrames),
- m_delta1Buf(numMfccFeatures, numFeatureFrames),
- m_delta2Buf(numMfccFeatures, numFeatureFrames),
- m_mfccWindowLen(mfccWindowLen),
- m_mfccWindowStride(mfccWindowStride),
- m_numMfccFeats(numMfccFeatures),
- m_numFeatureFrames(numFeatureFrames)
- {
- if (numMfccFeatures > 0 && mfccWindowLen > 0) {
- this->m_mfcc.Init();
- }
- }
-
- bool AsrPreProcess::DoPreProcess(const void* audioData, const size_t audioDataLen)
- {
- this->m_mfccSlidingWindow = audio::SlidingWindow<const int16_t>(
- static_cast<const int16_t*>(audioData), audioDataLen,
- this->m_mfccWindowLen, this->m_mfccWindowStride);
-
- uint32_t mfccBufIdx = 0;
-
- std::fill(m_mfccBuf.begin(), m_mfccBuf.end(), 0.f);
- std::fill(m_delta1Buf.begin(), m_delta1Buf.end(), 0.f);
- std::fill(m_delta2Buf.begin(), m_delta2Buf.end(), 0.f);
-
- /* While we can slide over the audio. */
- while (this->m_mfccSlidingWindow.HasNext()) {
- const int16_t* mfccWindow = this->m_mfccSlidingWindow.Next();
- auto mfccAudioData = std::vector<int16_t>(
- mfccWindow,
- mfccWindow + this->m_mfccWindowLen);
- auto mfcc = this->m_mfcc.MfccCompute(mfccAudioData);
- for (size_t i = 0; i < this->m_mfccBuf.size(0); ++i) {
- this->m_mfccBuf(i, mfccBufIdx) = mfcc[i];
- }
- ++mfccBufIdx;
- }
-
- /* Pad MFCC if needed by adding MFCC for zeros. */
- if (mfccBufIdx != this->m_numFeatureFrames) {
- std::vector<int16_t> zerosWindow = std::vector<int16_t>(this->m_mfccWindowLen, 0);
- std::vector<float> mfccZeros = this->m_mfcc.MfccCompute(zerosWindow);
-
- while (mfccBufIdx != this->m_numFeatureFrames) {
- memcpy(&this->m_mfccBuf(0, mfccBufIdx),
- mfccZeros.data(), sizeof(float) * m_numMfccFeats);
- ++mfccBufIdx;
- }
- }
-
- /* Compute first and second order deltas from MFCCs. */
- AsrPreProcess::ComputeDeltas(this->m_mfccBuf, this->m_delta1Buf, this->m_delta2Buf);
-
- /* Standardize calculated features. */
- this->Standarize();
-
- /* Quantise. */
- QuantParams quantParams = GetTensorQuantParams(this->m_inputTensor);
-
- if (0 == quantParams.scale) {
- printf_err("Quantisation scale can't be 0\n");
- return false;
- }
-
- switch(this->m_inputTensor->type) {
- case kTfLiteUInt8:
- return this->Quantise<uint8_t>(
- tflite::GetTensorData<uint8_t>(this->m_inputTensor), this->m_inputTensor->bytes,
- quantParams.scale, quantParams.offset);
- case kTfLiteInt8:
- return this->Quantise<int8_t>(
- tflite::GetTensorData<int8_t>(this->m_inputTensor), this->m_inputTensor->bytes,
- quantParams.scale, quantParams.offset);
- default:
- printf_err("Unsupported tensor type %s\n",
- TfLiteTypeGetName(this->m_inputTensor->type));
- }
-
- return false;
- }
-
- bool AsrPreProcess::ComputeDeltas(Array2d<float>& mfcc,
- Array2d<float>& delta1,
- Array2d<float>& delta2)
- {
- const std::vector <float> delta1Coeffs =
- {6.66666667e-02, 5.00000000e-02, 3.33333333e-02,
- 1.66666667e-02, -3.46944695e-18, -1.66666667e-02,
- -3.33333333e-02, -5.00000000e-02, -6.66666667e-02};
-
- const std::vector <float> delta2Coeffs =
- {0.06060606, 0.01515152, -0.01731602,
- -0.03679654, -0.04329004, -0.03679654,
- -0.01731602, 0.01515152, 0.06060606};
-
- if (delta1.size(0) == 0 || delta2.size(0) != delta1.size(0) ||
- mfcc.size(0) == 0 || mfcc.size(1) == 0) {
- return false;
- }
-
- /* Get the middle index; coeff vec len should always be odd. */
- const size_t coeffLen = delta1Coeffs.size();
- const size_t fMidIdx = (coeffLen - 1)/2;
- const size_t numFeatures = mfcc.size(0);
- const size_t numFeatVectors = mfcc.size(1);
-
- /* Iterate through features in MFCC vector. */
- for (size_t i = 0; i < numFeatures; ++i) {
- /* For each feature, iterate through time (t) samples representing feature evolution and
- * calculate d/dt and d^2/dt^2, using 1D convolution with differential kernels.
- * Convolution padding = valid, result size is `time length - kernel length + 1`.
- * The result is padded with 0 from both sides to match the size of initial time samples data.
- *
- * For the small filter, conv1D implementation as a simple loop is efficient enough.
- * Filters of a greater size would need CMSIS-DSP functions to be used, like arm_fir_f32.
- */
-
- for (size_t j = fMidIdx; j < numFeatVectors - fMidIdx; ++j) {
- float d1 = 0;
- float d2 = 0;
- const size_t mfccStIdx = j - fMidIdx;
-
- for (size_t k = 0, m = coeffLen - 1; k < coeffLen; ++k, --m) {
-
- d1 += mfcc(i,mfccStIdx + k) * delta1Coeffs[m];
- d2 += mfcc(i,mfccStIdx + k) * delta2Coeffs[m];
- }
-
- delta1(i,j) = d1;
- delta2(i,j) = d2;
- }
- }
-
- return true;
- }
-
- void AsrPreProcess::StandardizeVecF32(Array2d<float>& vec)
- {
- auto mean = math::MathUtils::MeanF32(vec.begin(), vec.totalSize());
- auto stddev = math::MathUtils::StdDevF32(vec.begin(), vec.totalSize(), mean);
-
- debug("Mean: %f, Stddev: %f\n", mean, stddev);
- if (stddev == 0) {
- std::fill(vec.begin(), vec.end(), 0);
- } else {
- const float stddevInv = 1.f/stddev;
- const float normalisedMean = mean/stddev;
-
- auto NormalisingFunction = [=](float& value) {
- value = value * stddevInv - normalisedMean;
- };
- std::for_each(vec.begin(), vec.end(), NormalisingFunction);
- }
- }
-
- void AsrPreProcess::Standarize()
- {
- AsrPreProcess::StandardizeVecF32(this->m_mfccBuf);
- AsrPreProcess::StandardizeVecF32(this->m_delta1Buf);
- AsrPreProcess::StandardizeVecF32(this->m_delta2Buf);
- }
-
- float AsrPreProcess::GetQuantElem(
- const float elem,
- const float quantScale,
- const int quantOffset,
- const float minVal,
- const float maxVal)
- {
- float val = std::round((elem/quantScale) + quantOffset);
- return std::min<float>(std::max<float>(val, minVal), maxVal);
- }
-
-} /* namespace app */
-} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/asr/usecase.cmake b/source/use_case/asr/usecase.cmake
index 50e7e26..2a2178b 100644
--- a/source/use_case/asr/usecase.cmake
+++ b/source/use_case/asr/usecase.cmake
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#----------------------------------------------------------------------------
+# Append the API to use for this use case
+list(APPEND ${use_case}_API_LIST "asr")
USER_OPTION(${use_case}_FILE_PATH "Directory with custom WAV input files, or path to a single WAV file, to use in the evaluation application."
${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/samples/
@@ -98,4 +100,4 @@ generate_tflite_code(
MODEL_PATH ${${use_case}_MODEL_TFLITE_PATH}
DESTINATION ${SRC_GEN_DIR}
EXPRESSIONS ${EXTRA_MODEL_CODE}
- )
+ NAMESPACE "arm" "app" "asr")
diff --git a/source/use_case/img_class/include/ImgClassProcessing.hpp b/source/use_case/img_class/include/ImgClassProcessing.hpp
deleted file mode 100644
index e931b7d..0000000
--- a/source/use_case/img_class/include/ImgClassProcessing.hpp
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef IMG_CLASS_PROCESSING_HPP
-#define IMG_CLASS_PROCESSING_HPP
-
-#include "BaseProcessing.hpp"
-#include "Model.hpp"
-#include "Classifier.hpp"
-
-namespace arm {
-namespace app {
-
- /**
- * @brief Pre-processing class for Image Classification use case.
- * Implements methods declared by BasePreProcess and anything else needed
- * to populate input tensors ready for inference.
- */
- class ImgClassPreProcess : public BasePreProcess {
-
- public:
- /**
- * @brief Constructor
- * @param[in] inputTensor Pointer to the TFLite Micro input Tensor.
- * @param[in] convertToInt8 Should the image be converted to Int8 range.
- **/
- explicit ImgClassPreProcess(TfLiteTensor* inputTensor, bool convertToInt8);
-
- /**
- * @brief Should perform pre-processing of 'raw' input image data and load it into
- * TFLite Micro input tensors ready for inference
- * @param[in] input Pointer to the data that pre-processing will work on.
- * @param[in] inputSize Size of the input data.
- * @return true if successful, false otherwise.
- **/
- bool DoPreProcess(const void* input, size_t inputSize) override;
-
- private:
- TfLiteTensor* m_inputTensor;
- bool m_convertToInt8;
- };
-
- /**
- * @brief Post-processing class for Image Classification use case.
- * Implements methods declared by BasePostProcess and anything else needed
- * to populate result vector.
- */
- class ImgClassPostProcess : public BasePostProcess {
-
- public:
- /**
- * @brief Constructor
- * @param[in] outputTensor Pointer to the TFLite Micro output Tensor.
- * @param[in] classifier Classifier object used to get top N results from classification.
- * @param[in] labels Vector of string labels to identify each output of the model.
- * @param[in] results Vector of classification results to store decoded outputs.
- **/
- ImgClassPostProcess(TfLiteTensor* outputTensor, Classifier& classifier,
- const std::vector<std::string>& labels,
- std::vector<ClassificationResult>& results);
-
- /**
- * @brief Should perform post-processing of the result of inference then
- * populate classification result data for any later use.
- * @return true if successful, false otherwise.
- **/
- bool DoPostProcess() override;
-
- private:
- TfLiteTensor* m_outputTensor;
- Classifier& m_imgClassifier;
- const std::vector<std::string>& m_labels;
- std::vector<ClassificationResult>& m_results;
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* IMG_CLASS_PROCESSING_HPP */ \ No newline at end of file
diff --git a/source/use_case/img_class/include/MobileNetModel.hpp b/source/use_case/img_class/include/MobileNetModel.hpp
deleted file mode 100644
index 503f1ac..0000000
--- a/source/use_case/img_class/include/MobileNetModel.hpp
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef IMG_CLASS_MOBILENETMODEL_HPP
-#define IMG_CLASS_MOBILENETMODEL_HPP
-
-#include "Model.hpp"
-
-namespace arm {
-namespace app {
-
- class MobileNetModel : public Model {
-
- public:
- /* Indices for the expected model - based on input tensor shape */
- static constexpr uint32_t ms_inputRowsIdx = 1;
- static constexpr uint32_t ms_inputColsIdx = 2;
- static constexpr uint32_t ms_inputChannelsIdx = 3;
-
- protected:
- /** @brief Gets the reference to op resolver interface class. */
- const tflite::MicroOpResolver& GetOpResolver() override;
-
- /** @brief Adds operations to the op resolver instance. */
- bool EnlistOperations() override;
-
- const uint8_t* ModelPointer() override;
-
- size_t ModelSize() override;
-
- private:
- /* Maximum number of individual operations that can be enlisted. */
- static constexpr int ms_maxOpCnt = 7;
-
- /* A mutable op resolver instance. */
- tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* IMG_CLASS_MOBILENETMODEL_HPP */ \ No newline at end of file
diff --git a/source/use_case/img_class/src/ImgClassProcessing.cc b/source/use_case/img_class/src/ImgClassProcessing.cc
deleted file mode 100644
index adf9794..0000000
--- a/source/use_case/img_class/src/ImgClassProcessing.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "ImgClassProcessing.hpp"
-#include "ImageUtils.hpp"
-#include "log_macros.h"
-
-namespace arm {
-namespace app {
-
- ImgClassPreProcess::ImgClassPreProcess(TfLiteTensor* inputTensor, bool convertToInt8)
- :m_inputTensor{inputTensor},
- m_convertToInt8{convertToInt8}
- {}
-
- bool ImgClassPreProcess::DoPreProcess(const void* data, size_t inputSize)
- {
- if (data == nullptr) {
- printf_err("Data pointer is null");
- return false;
- }
-
- auto input = static_cast<const uint8_t*>(data);
-
- std::memcpy(this->m_inputTensor->data.data, input, inputSize);
- debug("Input tensor populated \n");
-
- if (this->m_convertToInt8) {
- image::ConvertImgToInt8(this->m_inputTensor->data.data, this->m_inputTensor->bytes);
- }
-
- return true;
- }
-
- ImgClassPostProcess::ImgClassPostProcess(TfLiteTensor* outputTensor, Classifier& classifier,
- const std::vector<std::string>& labels,
- std::vector<ClassificationResult>& results)
- :m_outputTensor{outputTensor},
- m_imgClassifier{classifier},
- m_labels{labels},
- m_results{results}
- {}
-
- bool ImgClassPostProcess::DoPostProcess()
- {
- return this->m_imgClassifier.GetClassificationResults(
- this->m_outputTensor, this->m_results,
- this->m_labels, 5, false);
- }
-
-} /* namespace app */
-} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/img_class/src/MainLoop.cc b/source/use_case/img_class/src/MainLoop.cc
index d9fb925..de3779f 100644
--- a/source/use_case/img_class/src/MainLoop.cc
+++ b/source/use_case/img_class/src/MainLoop.cc
@@ -21,7 +21,16 @@
#include "MobileNetModel.hpp" /* Model class for running inference. */
#include "UseCaseHandler.hpp" /* Handlers for different user options. */
#include "UseCaseCommonUtils.hpp" /* Utils functions. */
-#include "log_macros.h"
+#include "BufAttributes.hpp" /* Buffer attributes to be applied */
+
+namespace arm {
+ namespace app {
+ static uint8_t tensorArena[ACTIVATION_BUF_SZ] ACTIVATION_BUF_ATTRIBUTE;
+ } /* namespace app */
+} /* namespace arm */
+
+extern uint8_t* GetModelPointer();
+extern size_t GetModelLen();
using ImgClassClassifier = arm::app::Classifier;
@@ -30,11 +39,22 @@ void main_loop()
arm::app::MobileNetModel model; /* Model wrapper object. */
/* Load the model. */
- if (!model.Init()) {
+ if (!model.Init(arm::app::tensorArena,
+ sizeof(arm::app::tensorArena),
+ GetModelPointer(),
+ GetModelLen())) {
printf_err("Failed to initialise model\n");
return;
}
+#if !defined(ARM_NPU)
+ /* If it is not a NPU build check if the model contains a NPU operator */
+ if (model.ContainsEthosUOperator()) {
+ printf_err("No driver support for Ethos-U operator found in the model.\n");
+ return;
+ }
+#endif /* ARM_NPU */
+
/* Instantiate application context. */
arm::app::ApplicationContext caseContext;
diff --git a/source/use_case/img_class/src/MobileNetModel.cc b/source/use_case/img_class/src/MobileNetModel.cc
deleted file mode 100644
index 2e48f3b..0000000
--- a/source/use_case/img_class/src/MobileNetModel.cc
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "MobileNetModel.hpp"
-#include "log_macros.h"
-
-const tflite::MicroOpResolver& arm::app::MobileNetModel::GetOpResolver()
-{
- return this->m_opResolver;
-}
-
-bool arm::app::MobileNetModel::EnlistOperations()
-{
- this->m_opResolver.AddDepthwiseConv2D();
- this->m_opResolver.AddConv2D();
- this->m_opResolver.AddAveragePool2D();
- this->m_opResolver.AddAdd();
- this->m_opResolver.AddReshape();
- this->m_opResolver.AddSoftmax();
-
-#if defined(ARM_NPU)
- if (kTfLiteOk == this->m_opResolver.AddEthosU()) {
- info("Added %s support to op resolver\n",
- tflite::GetString_ETHOSU());
- } else {
- printf_err("Failed to add Arm NPU support to op resolver.");
- return false;
- }
-#endif /* ARM_NPU */
- return true;
-}
-
-extern uint8_t* GetModelPointer();
-const uint8_t* arm::app::MobileNetModel::ModelPointer()
-{
- return GetModelPointer();
-}
-
-extern size_t GetModelLen();
-size_t arm::app::MobileNetModel::ModelSize()
-{
- return GetModelLen();
-} \ No newline at end of file
diff --git a/source/use_case/img_class/usecase.cmake b/source/use_case/img_class/usecase.cmake
index dafdbbf..2a8be09 100644
--- a/source/use_case/img_class/usecase.cmake
+++ b/source/use_case/img_class/usecase.cmake
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#----------------------------------------------------------------------------
+# Append the API to use for this use case
+list(APPEND ${use_case}_API_LIST "img_class")
USER_OPTION(${use_case}_FILE_PATH "Directory with custom image files to use, or path to a single image, in the evaluation application"
${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/samples/
diff --git a/source/use_case/inference_runner/include/TestModel.hpp b/source/use_case/inference_runner/include/TestModel.hpp
deleted file mode 100644
index 0846bd4..0000000
--- a/source/use_case/inference_runner/include/TestModel.hpp
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef INF_RUNNER_TESTMODEL_HPP
-#define INF_RUNNER_TESTMODEL_HPP
-
-#include "Model.hpp"
-
-namespace arm {
-namespace app {
-
- class TestModel : public Model {
-
- protected:
- /** @brief Gets the reference to op resolver interface class. */
- const tflite::AllOpsResolver& GetOpResolver() override;
-
- /** @brief Adds operations to the op resolver instance, not needed as using AllOpsResolver. */
- bool EnlistOperations() override {return false;}
-
- const uint8_t* ModelPointer() override;
-
- size_t ModelSize() override;
-
- private:
-
- /* No need to define individual ops at the cost of extra memory. */
- tflite::AllOpsResolver m_opResolver;
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* INF_RUNNER_TESTMODEL_HPP */ \ No newline at end of file
diff --git a/source/use_case/inference_runner/src/MainLoop.cc b/source/use_case/inference_runner/src/MainLoop.cc
index ddff40c..0991b7b 100644
--- a/source/use_case/inference_runner/src/MainLoop.cc
+++ b/source/use_case/inference_runner/src/MainLoop.cc
@@ -18,7 +18,37 @@
#include "TestModel.hpp" /* Model class for running inference. */
#include "UseCaseHandler.hpp" /* Handlers for different user options. */
#include "UseCaseCommonUtils.hpp" /* Utils functions. */
-#include "log_macros.h"
+#include "log_macros.h" /* Logging functions */
+#include "BufAttributes.hpp" /* Buffer attributes to be applied */
+
+namespace arm {
+ namespace app {
+ static uint8_t tensorArena[ACTIVATION_BUF_SZ] ACTIVATION_BUF_ATTRIBUTE;
+ } /* namespace app */
+} /* namespace arm */
+
+#if defined(DYNAMIC_MODEL_BASE) && defined(DYNAMIC_MODEL_SIZE)
+
+static uint8_t* GetModelPointer()
+{
+ info("Model pointer: 0x%08x\n", DYNAMIC_MODEL_BASE);
+ return reinterpret_cast<uint8_t *>(DYNAMIC_MODEL_BASE);
+}
+
+static size_t GetModelLen()
+{
+ /* TODO: Can we get the actual model size here somehow?
+ * Currently we return the reserved space. It is possible to do
+ * so by reading the memory pattern but it will not be reliable. */
+ return static_cast<size_t>(DYNAMIC_MODEL_SIZE);
+}
+
+#else /* defined(DYNAMIC_MODEL_BASE) && defined(DYNAMIC_MODEL_SIZE) */
+
+extern uint8_t* GetModelPointer();
+extern size_t GetModelLen();
+
+#endif /* defined(DYNAMIC_MODEL_BASE) && defined(DYNAMIC_MODEL_SIZE) */
enum opcodes
{
@@ -31,11 +61,22 @@ void main_loop()
arm::app::TestModel model; /* Model wrapper object. */
/* Load the model. */
- if (!model.Init()) {
+ if (!model.Init(arm::app::tensorArena,
+ sizeof(arm::app::tensorArena),
+ GetModelPointer(),
+ GetModelLen())) {
printf_err("Failed to initialise model\n");
return;
}
+#if !defined(ARM_NPU)
+ /* If it is not a NPU build check if the model contains a NPU operator */
+ if (model.ContainsEthosUOperator()) {
+ printf_err("No driver support for Ethos-U operator found in the model.\n");
+ return;
+ }
+#endif /* ARM_NPU */
+
/* Instantiate application context. */
arm::app::ApplicationContext caseContext;
diff --git a/source/use_case/inference_runner/src/TestModel.cc b/source/use_case/inference_runner/src/TestModel.cc
deleted file mode 100644
index 3e72119..0000000
--- a/source/use_case/inference_runner/src/TestModel.cc
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "TestModel.hpp"
-#include "log_macros.h"
-
-const tflite::AllOpsResolver& arm::app::TestModel::GetOpResolver()
-{
- return this->m_opResolver;
-}
-
-#if defined(DYNAMIC_MODEL_BASE) && defined(DYNAMIC_MODEL_SIZE)
-
- const uint8_t* arm::app::TestModel::ModelPointer()
- {
- info("Model pointer: 0x%08x\n", DYNAMIC_MODEL_BASE);
- return reinterpret_cast<uint8_t *>(DYNAMIC_MODEL_BASE);
- }
-
- size_t arm::app::TestModel::ModelSize()
- {
- /* TODO: Can we get the actual model size here somehow?
- * Currently we return the reserved space. It is possible to do
- * so by reading the memory pattern but it will not be reliable. */
- return static_cast<size_t>(DYNAMIC_MODEL_SIZE);
- }
-
-#else /* defined(DYNAMIC_MODEL_BASE) && defined(DYNAMIC_MODEL_SIZE) */
-
- extern uint8_t* GetModelPointer();
- const uint8_t* arm::app::TestModel::ModelPointer()
- {
- return GetModelPointer();
- }
-
- extern size_t GetModelLen();
- size_t arm::app::TestModel::ModelSize()
- {
- return GetModelLen();
- }
-
-#endif /* defined(DYNAMIC_MODEL_BASE) && defined(DYNAMIC_MODEL_SIZE) */
diff --git a/source/use_case/inference_runner/usecase.cmake b/source/use_case/inference_runner/usecase.cmake
index 7d12120..c70be71 100644
--- a/source/use_case/inference_runner/usecase.cmake
+++ b/source/use_case/inference_runner/usecase.cmake
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#----------------------------------------------------------------------------
+# Append the API to use for this use case
+list(APPEND ${use_case}_API_LIST "inference_runner")
USER_OPTION(${use_case}_ACTIVATION_BUF_SZ "Activation buffer size for the chosen model"
0x00200000
diff --git a/source/use_case/kws/include/KwsProcessing.hpp b/source/use_case/kws/include/KwsProcessing.hpp
deleted file mode 100644
index d3de3b3..0000000
--- a/source/use_case/kws/include/KwsProcessing.hpp
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_PROCESSING_HPP
-#define KWS_PROCESSING_HPP
-
-#include <AudioUtils.hpp>
-#include "BaseProcessing.hpp"
-#include "Model.hpp"
-#include "Classifier.hpp"
-#include "MicroNetKwsMfcc.hpp"
-
-#include <functional>
-
-namespace arm {
-namespace app {
-
- /**
- * @brief Pre-processing class for Keyword Spotting use case.
- * Implements methods declared by BasePreProcess and anything else needed
- * to populate input tensors ready for inference.
- */
- class KwsPreProcess : public BasePreProcess {
-
- public:
- /**
- * @brief Constructor
- * @param[in] inputTensor Pointer to the TFLite Micro input Tensor.
- * @param[in] numFeatures How many MFCC features to use.
- * @param[in] numFeatureFrames Number of MFCC vectors that need to be calculated
- * for an inference.
- * @param[in] mfccFrameLength Number of audio samples used to calculate one set of MFCC values when
- * sliding a window through the audio sample.
- * @param[in] mfccFrameStride Number of audio samples between consecutive windows.
- **/
- explicit KwsPreProcess(TfLiteTensor* inputTensor, size_t numFeatures, size_t numFeatureFrames,
- int mfccFrameLength, int mfccFrameStride);
-
- /**
- * @brief Should perform pre-processing of 'raw' input audio data and load it into
- * TFLite Micro input tensors ready for inference.
- * @param[in] input Pointer to the data that pre-processing will work on.
- * @param[in] inputSize Size of the input data.
- * @return true if successful, false otherwise.
- **/
- bool DoPreProcess(const void* input, size_t inputSize) override;
-
- size_t m_audioWindowIndex = 0; /* Index of audio slider, used when caching features in longer clips. */
- size_t m_audioDataWindowSize; /* Amount of audio needed for 1 inference. */
- size_t m_audioDataStride; /* Amount of audio to stride across if doing >1 inference in longer clips. */
-
- private:
- TfLiteTensor* m_inputTensor; /* Model input tensor. */
- const int m_mfccFrameLength;
- const int m_mfccFrameStride;
- const size_t m_numMfccFrames; /* How many sets of m_numMfccFeats. */
-
- audio::MicroNetKwsMFCC m_mfcc;
- audio::SlidingWindow<const int16_t> m_mfccSlidingWindow;
- size_t m_numMfccVectorsInAudioStride;
- size_t m_numReusedMfccVectors;
- std::function<void (std::vector<int16_t>&, int, bool, size_t)> m_mfccFeatureCalculator;
-
- /**
- * @brief Returns a function to perform feature calculation and populates input tensor data with
- * MFCC data.
- *
- * Input tensor data type check is performed to choose correct MFCC feature data type.
- * If tensor has an integer data type then original features are quantised.
- *
- * Warning: MFCC calculator provided as input must have the same life scope as returned function.
- *
- * @param[in] mfcc MFCC feature calculator.
- * @param[in,out] inputTensor Input tensor pointer to store calculated features.
- * @param[in] cacheSize Size of the feature vectors cache (number of feature vectors).
- * @return Function to be called providing audio sample and sliding window index.
- */
- std::function<void (std::vector<int16_t>&, int, bool, size_t)>
- GetFeatureCalculator(audio::MicroNetKwsMFCC& mfcc,
- TfLiteTensor* inputTensor,
- size_t cacheSize);
-
- template<class T>
- std::function<void (std::vector<int16_t>&, size_t, bool, size_t)>
- FeatureCalc(TfLiteTensor* inputTensor, size_t cacheSize,
- std::function<std::vector<T> (std::vector<int16_t>& )> compute);
- };
-
- /**
- * @brief Post-processing class for Keyword Spotting use case.
- * Implements methods declared by BasePostProcess and anything else needed
- * to populate result vector.
- */
- class KwsPostProcess : public BasePostProcess {
-
- private:
- TfLiteTensor* m_outputTensor; /* Model output tensor. */
- Classifier& m_kwsClassifier; /* KWS Classifier object. */
- const std::vector<std::string>& m_labels; /* KWS Labels. */
- std::vector<ClassificationResult>& m_results; /* Results vector for a single inference. */
-
- public:
- /**
- * @brief Constructor
- * @param[in] outputTensor Pointer to the TFLite Micro output Tensor.
- * @param[in] classifier Classifier object used to get top N results from classification.
- * @param[in] labels Vector of string labels to identify each output of the model.
- * @param[in/out] results Vector of classification results to store decoded outputs.
- **/
- KwsPostProcess(TfLiteTensor* outputTensor, Classifier& classifier,
- const std::vector<std::string>& labels,
- std::vector<ClassificationResult>& results);
-
- /**
- * @brief Should perform post-processing of the result of inference then
- * populate KWS result data for any later use.
- * @return true if successful, false otherwise.
- **/
- bool DoPostProcess() override;
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_PROCESSING_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws/include/KwsResult.hpp b/source/use_case/kws/include/KwsResult.hpp
deleted file mode 100644
index 38f32b4..0000000
--- a/source/use_case/kws/include/KwsResult.hpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_RESULT_HPP
-#define KWS_RESULT_HPP
-
-#include "ClassificationResult.hpp"
-
-#include <vector>
-
-namespace arm {
-namespace app {
-namespace kws {
-
- using ResultVec = std::vector<arm::app::ClassificationResult>;
-
- /* Structure for holding kws result. */
- class KwsResult {
-
- public:
- ResultVec m_resultVec; /* Container for "thresholded" classification results. */
- float m_timeStamp; /* Audio timestamp for this result. */
- uint32_t m_inferenceNumber; /* Corresponding inference number. */
- float m_threshold; /* Threshold value for `m_resultVec`. */
-
- KwsResult() = delete;
- KwsResult(ResultVec& resultVec,
- const float timestamp,
- const uint32_t inferenceIdx,
- const float scoreThreshold) {
-
- this->m_threshold = scoreThreshold;
- this->m_timeStamp = timestamp;
- this->m_inferenceNumber = inferenceIdx;
-
- this->m_resultVec = ResultVec();
- for (auto & i : resultVec) {
- if (i.m_normalisedVal >= this->m_threshold) {
- this->m_resultVec.emplace_back(i);
- }
- }
- }
- ~KwsResult() = default;
- };
-
-} /* namespace kws */
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_RESULT_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws/include/MicroNetKwsMfcc.hpp b/source/use_case/kws/include/MicroNetKwsMfcc.hpp
deleted file mode 100644
index b2565a3..0000000
--- a/source/use_case/kws/include/MicroNetKwsMfcc.hpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_MICRONET_MFCC_HPP
-#define KWS_MICRONET_MFCC_HPP
-
-#include "Mfcc.hpp"
-
-namespace arm {
-namespace app {
-namespace audio {
-
- /* Class to provide MicroNet specific MFCC calculation requirements. */
- class MicroNetKwsMFCC : public MFCC {
-
- public:
- static constexpr uint32_t ms_defaultSamplingFreq = 16000;
- static constexpr uint32_t ms_defaultNumFbankBins = 40;
- static constexpr uint32_t ms_defaultMelLoFreq = 20;
- static constexpr uint32_t ms_defaultMelHiFreq = 4000;
- static constexpr bool ms_defaultUseHtkMethod = true;
-
- explicit MicroNetKwsMFCC(const size_t numFeats, const size_t frameLen)
- : MFCC(MfccParams(
- ms_defaultSamplingFreq, ms_defaultNumFbankBins,
- ms_defaultMelLoFreq, ms_defaultMelHiFreq,
- numFeats, frameLen, ms_defaultUseHtkMethod))
- {}
- MicroNetKwsMFCC() = delete;
- ~MicroNetKwsMFCC() = default;
- };
-
-} /* namespace audio */
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_MICRONET_MFCC_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws/include/MicroNetKwsModel.hpp b/source/use_case/kws/include/MicroNetKwsModel.hpp
deleted file mode 100644
index 3259c45..0000000
--- a/source/use_case/kws/include/MicroNetKwsModel.hpp
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_MICRONETMODEL_HPP
-#define KWS_MICRONETMODEL_HPP
-
-#include "Model.hpp"
-
-extern const int g_FrameLength;
-extern const int g_FrameStride;
-extern const float g_ScoreThreshold;
-
-namespace arm {
-namespace app {
-
- class MicroNetKwsModel : public Model {
- public:
- /* Indices for the expected model - based on input and output tensor shapes */
- static constexpr uint32_t ms_inputRowsIdx = 1;
- static constexpr uint32_t ms_inputColsIdx = 2;
- static constexpr uint32_t ms_outputRowsIdx = 2;
- static constexpr uint32_t ms_outputColsIdx = 3;
-
- protected:
- /** @brief Gets the reference to op resolver interface class. */
- const tflite::MicroOpResolver& GetOpResolver() override;
-
- /** @brief Adds operations to the op resolver instance. */
- bool EnlistOperations() override;
-
- const uint8_t* ModelPointer() override;
-
- size_t ModelSize() override;
-
- private:
- /* Maximum number of individual operations that can be enlisted. */
- static constexpr int ms_maxOpCnt = 7;
-
- /* A mutable op resolver instance. */
- tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_MICRONETMODEL_HPP */
diff --git a/source/use_case/kws/src/KwsProcessing.cc b/source/use_case/kws/src/KwsProcessing.cc
deleted file mode 100644
index 328709d..0000000
--- a/source/use_case/kws/src/KwsProcessing.cc
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "KwsProcessing.hpp"
-#include "ImageUtils.hpp"
-#include "log_macros.h"
-#include "MicroNetKwsModel.hpp"
-
-namespace arm {
-namespace app {
-
- KwsPreProcess::KwsPreProcess(TfLiteTensor* inputTensor, size_t numFeatures, size_t numMfccFrames,
- int mfccFrameLength, int mfccFrameStride
- ):
- m_inputTensor{inputTensor},
- m_mfccFrameLength{mfccFrameLength},
- m_mfccFrameStride{mfccFrameStride},
- m_numMfccFrames{numMfccFrames},
- m_mfcc{audio::MicroNetKwsMFCC(numFeatures, mfccFrameLength)}
- {
- this->m_mfcc.Init();
-
- /* Deduce the data length required for 1 inference from the network parameters. */
- this->m_audioDataWindowSize = this->m_numMfccFrames * this->m_mfccFrameStride +
- (this->m_mfccFrameLength - this->m_mfccFrameStride);
-
- /* Creating an MFCC feature sliding window for the data required for 1 inference. */
- this->m_mfccSlidingWindow = audio::SlidingWindow<const int16_t>(nullptr, this->m_audioDataWindowSize,
- this->m_mfccFrameLength, this->m_mfccFrameStride);
-
- /* For longer audio clips we choose to move by half the audio window size
- * => for a 1 second window size there is an overlap of 0.5 seconds. */
- this->m_audioDataStride = this->m_audioDataWindowSize / 2;
-
- /* To have the previously calculated features re-usable, stride must be multiple
- * of MFCC features window stride. Reduce stride through audio if needed. */
- if (0 != this->m_audioDataStride % this->m_mfccFrameStride) {
- this->m_audioDataStride -= this->m_audioDataStride % this->m_mfccFrameStride;
- }
-
- this->m_numMfccVectorsInAudioStride = this->m_audioDataStride / this->m_mfccFrameStride;
-
- /* Calculate number of the feature vectors in the window overlap region.
- * These feature vectors will be reused.*/
- this->m_numReusedMfccVectors = this->m_mfccSlidingWindow.TotalStrides() + 1
- - this->m_numMfccVectorsInAudioStride;
-
- /* Construct feature calculation function. */
- this->m_mfccFeatureCalculator = GetFeatureCalculator(this->m_mfcc, this->m_inputTensor,
- this->m_numReusedMfccVectors);
-
- if (!this->m_mfccFeatureCalculator) {
- printf_err("Feature calculator not initialized.");
- }
- }
-
- bool KwsPreProcess::DoPreProcess(const void* data, size_t inputSize)
- {
- UNUSED(inputSize);
- if (data == nullptr) {
- printf_err("Data pointer is null");
- }
-
- /* Set the features sliding window to the new address. */
- auto input = static_cast<const int16_t*>(data);
- this->m_mfccSlidingWindow.Reset(input);
-
- /* Cache is only usable if we have more than 1 inference in an audio clip. */
- bool useCache = this->m_audioWindowIndex > 0 && this->m_numReusedMfccVectors > 0;
-
- /* Use a sliding window to calculate MFCC features frame by frame. */
- while (this->m_mfccSlidingWindow.HasNext()) {
- const int16_t* mfccWindow = this->m_mfccSlidingWindow.Next();
-
- std::vector<int16_t> mfccFrameAudioData = std::vector<int16_t>(mfccWindow,
- mfccWindow + this->m_mfccFrameLength);
-
- /* Compute features for this window and write them to input tensor. */
- this->m_mfccFeatureCalculator(mfccFrameAudioData, this->m_mfccSlidingWindow.Index(),
- useCache, this->m_numMfccVectorsInAudioStride);
- }
-
- debug("Input tensor populated \n");
-
- return true;
- }
-
- /**
- * @brief Generic feature calculator factory.
- *
- * Returns lambda function to compute features using features cache.
- * Real features math is done by a lambda function provided as a parameter.
- * Features are written to input tensor memory.
- *
- * @tparam T Feature vector type.
- * @param[in] inputTensor Model input tensor pointer.
- * @param[in] cacheSize Number of feature vectors to cache. Defined by the sliding window overlap.
- * @param[in] compute Features calculator function.
- * @return Lambda function to compute features.
- */
- template<class T>
- std::function<void (std::vector<int16_t>&, size_t, bool, size_t)>
- KwsPreProcess::FeatureCalc(TfLiteTensor* inputTensor, size_t cacheSize,
- std::function<std::vector<T> (std::vector<int16_t>& )> compute)
- {
- /* Feature cache to be captured by lambda function. */
- static std::vector<std::vector<T>> featureCache = std::vector<std::vector<T>>(cacheSize);
-
- return [=](std::vector<int16_t>& audioDataWindow,
- size_t index,
- bool useCache,
- size_t featuresOverlapIndex)
- {
- T* tensorData = tflite::GetTensorData<T>(inputTensor);
- std::vector<T> features;
-
- /* Reuse features from cache if cache is ready and sliding windows overlap.
- * Overlap is in the beginning of sliding window with a size of a feature cache. */
- if (useCache && index < featureCache.size()) {
- features = std::move(featureCache[index]);
- } else {
- features = std::move(compute(audioDataWindow));
- }
- auto size = features.size();
- auto sizeBytes = sizeof(T) * size;
- std::memcpy(tensorData + (index * size), features.data(), sizeBytes);
-
- /* Start renewing cache as soon iteration goes out of the windows overlap. */
- if (index >= featuresOverlapIndex) {
- featureCache[index - featuresOverlapIndex] = std::move(features);
- }
- };
- }
-
- template std::function<void (std::vector<int16_t>&, size_t , bool, size_t)>
- KwsPreProcess::FeatureCalc<int8_t>(TfLiteTensor* inputTensor,
- size_t cacheSize,
- std::function<std::vector<int8_t> (std::vector<int16_t>&)> compute);
-
- template std::function<void(std::vector<int16_t>&, size_t, bool, size_t)>
- KwsPreProcess::FeatureCalc<float>(TfLiteTensor* inputTensor,
- size_t cacheSize,
- std::function<std::vector<float>(std::vector<int16_t>&)> compute);
-
-
- std::function<void (std::vector<int16_t>&, int, bool, size_t)>
- KwsPreProcess::GetFeatureCalculator(audio::MicroNetKwsMFCC& mfcc, TfLiteTensor* inputTensor, size_t cacheSize)
- {
- std::function<void (std::vector<int16_t>&, size_t, bool, size_t)> mfccFeatureCalc;
-
- TfLiteQuantization quant = inputTensor->quantization;
-
- if (kTfLiteAffineQuantization == quant.type) {
- auto *quantParams = (TfLiteAffineQuantization *) quant.params;
- const float quantScale = quantParams->scale->data[0];
- const int quantOffset = quantParams->zero_point->data[0];
-
- switch (inputTensor->type) {
- case kTfLiteInt8: {
- mfccFeatureCalc = this->FeatureCalc<int8_t>(inputTensor,
- cacheSize,
- [=, &mfcc](std::vector<int16_t>& audioDataWindow) {
- return mfcc.MfccComputeQuant<int8_t>(audioDataWindow,
- quantScale,
- quantOffset);
- }
- );
- break;
- }
- default:
- printf_err("Tensor type %s not supported\n", TfLiteTypeGetName(inputTensor->type));
- }
- } else {
- mfccFeatureCalc = this->FeatureCalc<float>(inputTensor, cacheSize,
- [&mfcc](std::vector<int16_t>& audioDataWindow) {
- return mfcc.MfccCompute(audioDataWindow); }
- );
- }
- return mfccFeatureCalc;
- }
-
- KwsPostProcess::KwsPostProcess(TfLiteTensor* outputTensor, Classifier& classifier,
- const std::vector<std::string>& labels,
- std::vector<ClassificationResult>& results)
- :m_outputTensor{outputTensor},
- m_kwsClassifier{classifier},
- m_labels{labels},
- m_results{results}
- {}
-
- bool KwsPostProcess::DoPostProcess()
- {
- return this->m_kwsClassifier.GetClassificationResults(
- this->m_outputTensor, this->m_results,
- this->m_labels, 1, true);
- }
-
-} /* namespace app */
-} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/kws/src/MainLoop.cc b/source/use_case/kws/src/MainLoop.cc
index e590c4a..3c35a7f 100644
--- a/source/use_case/kws/src/MainLoop.cc
+++ b/source/use_case/kws/src/MainLoop.cc
@@ -21,7 +21,18 @@
#include "Labels.hpp" /* For label strings. */
#include "UseCaseHandler.hpp" /* Handlers for different user options. */
#include "UseCaseCommonUtils.hpp" /* Utils functions. */
-#include "log_macros.h"
+#include "log_macros.h" /* Logging functions */
+#include "BufAttributes.hpp" /* Buffer attributes to be applied */
+
+namespace arm {
+namespace app {
+namespace kws {
+ static uint8_t tensorArena[ACTIVATION_BUF_SZ] ACTIVATION_BUF_ATTRIBUTE;
+ extern uint8_t *GetModelPointer();
+ extern size_t GetModelLen();
+} /* namespace kws */
+} /* namespace app */
+} /* namespace arm */
using KwsClassifier = arm::app::Classifier;
@@ -53,11 +64,22 @@ void main_loop()
arm::app::MicroNetKwsModel model; /* Model wrapper object. */
/* Load the model. */
- if (!model.Init()) {
+ if (!model.Init(arm::app::kws::tensorArena,
+ sizeof(arm::app::kws::tensorArena),
+ arm::app::kws::GetModelPointer(),
+ arm::app::kws::GetModelLen())) {
printf_err("Failed to initialise model\n");
return;
}
+#if !defined(ARM_NPU)
+ /* If it is not a NPU build check if the model contains a NPU operator */
+ if (model.ContainsEthosUOperator()) {
+ printf_err("No driver support for Ethos-U operator found in the model.\n");
+ return;
+ }
+#endif /* ARM_NPU */
+
/* Instantiate application context. */
arm::app::ApplicationContext caseContext;
@@ -65,9 +87,9 @@ void main_loop()
caseContext.Set<arm::app::Profiler&>("profiler", profiler);
caseContext.Set<arm::app::Model&>("model", model);
caseContext.Set<uint32_t>("clipIndex", 0);
- caseContext.Set<int>("frameLength", g_FrameLength);
- caseContext.Set<int>("frameStride", g_FrameStride);
- caseContext.Set<float>("scoreThreshold", g_ScoreThreshold); /* Normalised score threshold. */
+ caseContext.Set<int>("frameLength", arm::app::kws::g_FrameLength);
+ caseContext.Set<int>("frameStride", arm::app::kws::g_FrameStride);
+ caseContext.Set<float>("scoreThreshold", arm::app::kws::g_ScoreThreshold); /* Normalised score threshold. */
KwsClassifier classifier; /* classifier wrapper object. */
caseContext.Set<arm::app::Classifier&>("classifier", classifier);
@@ -114,4 +136,4 @@ void main_loop()
}
} while (executionSuccessful && bUseMenu);
info("Main loop terminated.\n");
-} \ No newline at end of file
+}
diff --git a/source/use_case/kws/src/MicroNetKwsModel.cc b/source/use_case/kws/src/MicroNetKwsModel.cc
deleted file mode 100644
index 1c38525..0000000
--- a/source/use_case/kws/src/MicroNetKwsModel.cc
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "MicroNetKwsModel.hpp"
-#include "log_macros.h"
-
-const tflite::MicroOpResolver& arm::app::MicroNetKwsModel::GetOpResolver()
-{
- return this->m_opResolver;
-}
-
-bool arm::app::MicroNetKwsModel::EnlistOperations()
-{
- this->m_opResolver.AddReshape();
- this->m_opResolver.AddAveragePool2D();
- this->m_opResolver.AddConv2D();
- this->m_opResolver.AddDepthwiseConv2D();
- this->m_opResolver.AddFullyConnected();
- this->m_opResolver.AddRelu();
-
-#if defined(ARM_NPU)
- if (kTfLiteOk == this->m_opResolver.AddEthosU()) {
- info("Added %s support to op resolver\n",
- tflite::GetString_ETHOSU());
- } else {
- printf_err("Failed to add Arm NPU support to op resolver.");
- return false;
- }
-#endif /* ARM_NPU */
- return true;
-}
-
-extern uint8_t* GetModelPointer();
-const uint8_t* arm::app::MicroNetKwsModel::ModelPointer()
-{
- return GetModelPointer();
-}
-
-extern size_t GetModelLen();
-size_t arm::app::MicroNetKwsModel::ModelSize()
-{
- return GetModelLen();
-} \ No newline at end of file
diff --git a/source/use_case/kws/usecase.cmake b/source/use_case/kws/usecase.cmake
index 9f3736e..d9985c7 100644
--- a/source/use_case/kws/usecase.cmake
+++ b/source/use_case/kws/usecase.cmake
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#----------------------------------------------------------------------------
+# Append the API to use for this use case
+list(APPEND ${use_case}_API_LIST "kws")
USER_OPTION(${use_case}_FILE_PATH "Directory with custom WAV input files, or path to a single WAV file, to use in the evaluation application."
${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/samples/
@@ -96,4 +98,5 @@ generate_tflite_code(
MODEL_PATH ${${use_case}_MODEL_TFLITE_PATH}
DESTINATION ${SRC_GEN_DIR}
EXPRESSIONS ${EXTRA_MODEL_CODE}
+ NAMESPACE "arm" "app" "kws"
)
diff --git a/source/use_case/kws_asr/include/AsrClassifier.hpp b/source/use_case/kws_asr/include/AsrClassifier.hpp
deleted file mode 100644
index 6ab9685..0000000
--- a/source/use_case/kws_asr/include/AsrClassifier.hpp
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ASR_CLASSIFIER_HPP
-#define ASR_CLASSIFIER_HPP
-
-#include "Classifier.hpp"
-
-namespace arm {
-namespace app {
-
- class AsrClassifier : public Classifier {
- public:
- /**
- * @brief Gets the top N classification results from the
- * output vector.
- * @param[in] outputTensor Inference output tensor from an NN model.
- * @param[out] vecResults A vector of classification results
- * populated by this function.
- * @param[in] labels Labels vector to match classified classes
- * @param[in] topNCount Number of top classifications to pick.
- * @param[in] use_softmax Whether softmax scaling should be applied to model output.
- * @return true if successful, false otherwise.
- **/
- bool GetClassificationResults(
- TfLiteTensor* outputTensor,
- std::vector<ClassificationResult>& vecResults,
- const std::vector <std::string>& labels, uint32_t topNCount,
- bool use_softmax = false) override;
-
- private:
-
- /**
- * @brief Utility function that gets the top 1 classification results from the
- * output tensor (vector of vector).
- * @param[in] tensor Inference output tensor from an NN model.
- * @param[out] vecResults A vector of classification results
- * populated by this function.
- * @param[in] labels Labels vector to match classified classes.
- * @param[in] scale Quantization scale.
- * @param[in] zeroPoint Quantization zero point.
- * @return true if successful, false otherwise.
- **/
- template<typename T>
- bool GetTopResults(TfLiteTensor* tensor,
- std::vector<ClassificationResult>& vecResults,
- const std::vector <std::string>& labels, double scale, double zeroPoint);
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* ASR_CLASSIFIER_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/include/AsrResult.hpp b/source/use_case/kws_asr/include/AsrResult.hpp
deleted file mode 100644
index 25fa9e8..0000000
--- a/source/use_case/kws_asr/include/AsrResult.hpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ASR_RESULT_HPP
-#define ASR_RESULT_HPP
-
-#include "ClassificationResult.hpp"
-
-#include <vector>
-
-namespace arm {
-namespace app {
-namespace asr {
-
- using ResultVec = std::vector<arm::app::ClassificationResult>;
-
- /* Structure for holding asr result. */
- class AsrResult {
-
- public:
- ResultVec m_resultVec; /* Container for "thresholded" classification results. */
- float m_timeStamp; /* Audio timestamp for this result. */
- uint32_t m_inferenceNumber; /* Corresponding inference number. */
- float m_threshold; /* Threshold value for `m_resultVec` */
-
- AsrResult() = delete;
- AsrResult(ResultVec& resultVec,
- const float timestamp,
- const uint32_t inferenceIdx,
- const float scoreThreshold) {
-
- this->m_threshold = scoreThreshold;
- this->m_timeStamp = timestamp;
- this->m_inferenceNumber = inferenceIdx;
-
- this->m_resultVec = ResultVec();
- for (auto& i : resultVec) {
- if (i.m_normalisedVal >= this->m_threshold) {
- this->m_resultVec.emplace_back(i);
- }
- }
- }
- ~AsrResult() = default;
- };
-
-} /* namespace asr */
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* ASR_RESULT_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/include/KwsProcessing.hpp b/source/use_case/kws_asr/include/KwsProcessing.hpp
deleted file mode 100644
index d3de3b3..0000000
--- a/source/use_case/kws_asr/include/KwsProcessing.hpp
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_PROCESSING_HPP
-#define KWS_PROCESSING_HPP
-
-#include <AudioUtils.hpp>
-#include "BaseProcessing.hpp"
-#include "Model.hpp"
-#include "Classifier.hpp"
-#include "MicroNetKwsMfcc.hpp"
-
-#include <functional>
-
-namespace arm {
-namespace app {
-
- /**
- * @brief Pre-processing class for Keyword Spotting use case.
- * Implements methods declared by BasePreProcess and anything else needed
- * to populate input tensors ready for inference.
- */
- class KwsPreProcess : public BasePreProcess {
-
- public:
- /**
- * @brief Constructor
- * @param[in] inputTensor Pointer to the TFLite Micro input Tensor.
- * @param[in] numFeatures How many MFCC features to use.
- * @param[in] numFeatureFrames Number of MFCC vectors that need to be calculated
- * for an inference.
- * @param[in] mfccFrameLength Number of audio samples used to calculate one set of MFCC values when
- * sliding a window through the audio sample.
- * @param[in] mfccFrameStride Number of audio samples between consecutive windows.
- **/
- explicit KwsPreProcess(TfLiteTensor* inputTensor, size_t numFeatures, size_t numFeatureFrames,
- int mfccFrameLength, int mfccFrameStride);
-
- /**
- * @brief Should perform pre-processing of 'raw' input audio data and load it into
- * TFLite Micro input tensors ready for inference.
- * @param[in] input Pointer to the data that pre-processing will work on.
- * @param[in] inputSize Size of the input data.
- * @return true if successful, false otherwise.
- **/
- bool DoPreProcess(const void* input, size_t inputSize) override;
-
- size_t m_audioWindowIndex = 0; /* Index of audio slider, used when caching features in longer clips. */
- size_t m_audioDataWindowSize; /* Amount of audio needed for 1 inference. */
- size_t m_audioDataStride; /* Amount of audio to stride across if doing >1 inference in longer clips. */
-
- private:
- TfLiteTensor* m_inputTensor; /* Model input tensor. */
- const int m_mfccFrameLength;
- const int m_mfccFrameStride;
- const size_t m_numMfccFrames; /* How many sets of m_numMfccFeats. */
-
- audio::MicroNetKwsMFCC m_mfcc;
- audio::SlidingWindow<const int16_t> m_mfccSlidingWindow;
- size_t m_numMfccVectorsInAudioStride;
- size_t m_numReusedMfccVectors;
- std::function<void (std::vector<int16_t>&, int, bool, size_t)> m_mfccFeatureCalculator;
-
- /**
- * @brief Returns a function to perform feature calculation and populates input tensor data with
- * MFCC data.
- *
- * Input tensor data type check is performed to choose correct MFCC feature data type.
- * If tensor has an integer data type then original features are quantised.
- *
- * Warning: MFCC calculator provided as input must have the same life scope as returned function.
- *
- * @param[in] mfcc MFCC feature calculator.
- * @param[in,out] inputTensor Input tensor pointer to store calculated features.
- * @param[in] cacheSize Size of the feature vectors cache (number of feature vectors).
- * @return Function to be called providing audio sample and sliding window index.
- */
- std::function<void (std::vector<int16_t>&, int, bool, size_t)>
- GetFeatureCalculator(audio::MicroNetKwsMFCC& mfcc,
- TfLiteTensor* inputTensor,
- size_t cacheSize);
-
- template<class T>
- std::function<void (std::vector<int16_t>&, size_t, bool, size_t)>
- FeatureCalc(TfLiteTensor* inputTensor, size_t cacheSize,
- std::function<std::vector<T> (std::vector<int16_t>& )> compute);
- };
-
- /**
- * @brief Post-processing class for Keyword Spotting use case.
- * Implements methods declared by BasePostProcess and anything else needed
- * to populate result vector.
- */
- class KwsPostProcess : public BasePostProcess {
-
- private:
- TfLiteTensor* m_outputTensor; /* Model output tensor. */
- Classifier& m_kwsClassifier; /* KWS Classifier object. */
- const std::vector<std::string>& m_labels; /* KWS Labels. */
- std::vector<ClassificationResult>& m_results; /* Results vector for a single inference. */
-
- public:
- /**
- * @brief Constructor
- * @param[in] outputTensor Pointer to the TFLite Micro output Tensor.
- * @param[in] classifier Classifier object used to get top N results from classification.
- * @param[in] labels Vector of string labels to identify each output of the model.
- * @param[in/out] results Vector of classification results to store decoded outputs.
- **/
- KwsPostProcess(TfLiteTensor* outputTensor, Classifier& classifier,
- const std::vector<std::string>& labels,
- std::vector<ClassificationResult>& results);
-
- /**
- * @brief Should perform post-processing of the result of inference then
- * populate KWS result data for any later use.
- * @return true if successful, false otherwise.
- **/
- bool DoPostProcess() override;
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_PROCESSING_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/include/KwsResult.hpp b/source/use_case/kws_asr/include/KwsResult.hpp
deleted file mode 100644
index 45bb790..0000000
--- a/source/use_case/kws_asr/include/KwsResult.hpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_RESULT_HPP
-#define KWS_RESULT_HPP
-
-#include "ClassificationResult.hpp"
-
-#include <vector>
-
-namespace arm {
-namespace app {
-namespace kws {
-
- using ResultVec = std::vector < arm::app::ClassificationResult >;
-
- /* Structure for holding kws result. */
- class KwsResult {
-
- public:
- ResultVec m_resultVec; /* Container for "thresholded" classification results. */
- float m_timeStamp; /* Audio timestamp for this result. */
- uint32_t m_inferenceNumber; /* Corresponding inference number. */
- float m_threshold; /* Threshold value for `m_resultVec.` */
-
- KwsResult() = delete;
- KwsResult(ResultVec& resultVec,
- const float timestamp,
- const uint32_t inferenceIdx,
- const float scoreThreshold) {
-
- this->m_threshold = scoreThreshold;
- this->m_timeStamp = timestamp;
- this->m_inferenceNumber = inferenceIdx;
-
- this->m_resultVec = ResultVec();
- for (auto & i : resultVec) {
- if (i.m_normalisedVal >= this->m_threshold) {
- this->m_resultVec.emplace_back(i);
- }
- }
- }
- ~KwsResult() = default;
- };
-
-} /* namespace kws */
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_RESULT_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/include/MicroNetKwsMfcc.hpp b/source/use_case/kws_asr/include/MicroNetKwsMfcc.hpp
deleted file mode 100644
index af6ba5f..0000000
--- a/source/use_case/kws_asr/include/MicroNetKwsMfcc.hpp
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_ASR_MICRONET_MFCC_HPP
-#define KWS_ASR_MICRONET_MFCC_HPP
-
-#include "Mfcc.hpp"
-
-namespace arm {
-namespace app {
-namespace audio {
-
- /* Class to provide MicroNet specific MFCC calculation requirements. */
- class MicroNetKwsMFCC : public MFCC {
-
- public:
- static constexpr uint32_t ms_defaultSamplingFreq = 16000;
- static constexpr uint32_t ms_defaultNumFbankBins = 40;
- static constexpr uint32_t ms_defaultMelLoFreq = 20;
- static constexpr uint32_t ms_defaultMelHiFreq = 4000;
- static constexpr bool ms_defaultUseHtkMethod = true;
-
-
- explicit MicroNetKwsMFCC(const size_t numFeats, const size_t frameLen)
- : MFCC(MfccParams(
- ms_defaultSamplingFreq, ms_defaultNumFbankBins,
- ms_defaultMelLoFreq, ms_defaultMelHiFreq,
- numFeats, frameLen, ms_defaultUseHtkMethod))
- {}
- MicroNetKwsMFCC() = delete;
- ~MicroNetKwsMFCC() = default;
- };
-
-} /* namespace audio */
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_ASR_MICRONET_MFCC_HPP */
diff --git a/source/use_case/kws_asr/include/MicroNetKwsModel.hpp b/source/use_case/kws_asr/include/MicroNetKwsModel.hpp
deleted file mode 100644
index 22cf916..0000000
--- a/source/use_case/kws_asr/include/MicroNetKwsModel.hpp
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_ASR_MICRONETMODEL_HPP
-#define KWS_ASR_MICRONETMODEL_HPP
-
-#include "Model.hpp"
-
-namespace arm {
-namespace app {
-namespace kws {
- extern const int g_FrameLength;
- extern const int g_FrameStride;
- extern const float g_ScoreThreshold;
- extern const uint32_t g_NumMfcc;
- extern const uint32_t g_NumAudioWins;
-} /* namespace kws */
-} /* namespace app */
-} /* namespace arm */
-
-namespace arm {
-namespace app {
- class MicroNetKwsModel : public Model {
- public:
- /* Indices for the expected model - based on input and output tensor shapes */
- static constexpr uint32_t ms_inputRowsIdx = 1;
- static constexpr uint32_t ms_inputColsIdx = 2;
- static constexpr uint32_t ms_outputRowsIdx = 2;
- static constexpr uint32_t ms_outputColsIdx = 3;
-
- protected:
- /** @brief Gets the reference to op resolver interface class. */
- const tflite::MicroOpResolver& GetOpResolver() override;
-
- /** @brief Adds operations to the op resolver instance. */
- bool EnlistOperations() override;
-
- const uint8_t* ModelPointer() override;
-
- size_t ModelSize() override;
-
- private:
- /* Maximum number of individual operations that can be enlisted. */
- static constexpr int ms_maxOpCnt = 7;
-
- /* A mutable op resolver instance. */
- tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_ASR_MICRONETMODEL_HPP */
diff --git a/source/use_case/kws_asr/include/OutputDecode.hpp b/source/use_case/kws_asr/include/OutputDecode.hpp
deleted file mode 100644
index cea2c33..0000000
--- a/source/use_case/kws_asr/include/OutputDecode.hpp
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_ASR_OUTPUT_DECODE_HPP
-#define KWS_ASR_OUTPUT_DECODE_HPP
-
-#include "AsrClassifier.hpp"
-
-namespace arm {
-namespace app {
-namespace audio {
-namespace asr {
-
- /**
- * @brief Gets the top N classification results from the
- * output vector.
- * @param[in] vecResults Label output from classifier.
- * @return true if successful, false otherwise.
- **/
- std::string DecodeOutput(const std::vector<ClassificationResult>& vecResults);
-
-} /* namespace asr */
-} /* namespace audio */
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_ASR_OUTPUT_DECODE_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/include/Wav2LetterMfcc.hpp b/source/use_case/kws_asr/include/Wav2LetterMfcc.hpp
deleted file mode 100644
index 75d75da..0000000
--- a/source/use_case/kws_asr/include/Wav2LetterMfcc.hpp
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_ASR_WAV2LET_MFCC_HPP
-#define KWS_ASR_WAV2LET_MFCC_HPP
-
-#include "Mfcc.hpp"
-
-namespace arm {
-namespace app {
-namespace audio {
-
- /* Class to provide Wav2Letter specific MFCC calculation requirements. */
- class Wav2LetterMFCC : public MFCC {
-
- public:
- static constexpr uint32_t ms_defaultSamplingFreq = 16000;
- static constexpr uint32_t ms_defaultNumFbankBins = 128;
- static constexpr uint32_t ms_defaultMelLoFreq = 0;
- static constexpr uint32_t ms_defaultMelHiFreq = 8000;
- static constexpr bool ms_defaultUseHtkMethod = false;
-
- explicit Wav2LetterMFCC(const size_t numFeats, const size_t frameLen)
- : MFCC(MfccParams(
- ms_defaultSamplingFreq, ms_defaultNumFbankBins,
- ms_defaultMelLoFreq, ms_defaultMelHiFreq,
- numFeats, frameLen, ms_defaultUseHtkMethod))
- {}
-
- Wav2LetterMFCC() = delete;
- ~Wav2LetterMFCC() = default;
-
- protected:
-
- /**
- * @brief Overrides base class implementation of this function.
- * @param[in] fftVec Vector populated with FFT magnitudes.
- * @param[in] melFilterBank 2D Vector with filter bank weights.
- * @param[in] filterBankFilterFirst Vector containing the first indices of filter bank
- * to be used for each bin.
- * @param[in] filterBankFilterLast Vector containing the last indices of filter bank
- * to be used for each bin.
- * @param[out] melEnergies Pre-allocated vector of MEL energies to be
- * populated.
- * @return true if successful, false otherwise.
- */
- bool ApplyMelFilterBank(
- std::vector<float>& fftVec,
- std::vector<std::vector<float>>& melFilterBank,
- std::vector<uint32_t>& filterBankFilterFirst,
- std::vector<uint32_t>& filterBankFilterLast,
- std::vector<float>& melEnergies) override;
-
- /**
- * @brief Override for the base class implementation convert mel
- * energies to logarithmic scale. The difference from
- * default behaviour is that the power is converted to dB
- * and subsequently clamped.
- * @param[in,out] melEnergies 1D vector of Mel energies.
- **/
- void ConvertToLogarithmicScale(
- std::vector<float>& melEnergies) override;
-
- /**
- * @brief Create a matrix used to calculate Discrete Cosine
- * Transform. Override for the base class' default
- * implementation as the first and last elements
- * use a different normaliser.
- * @param[in] inputLength Input length of the buffer on which
- * DCT will be performed.
- * @param[in] coefficientCount Total coefficients per input length.
- * @return 1D vector with inputLength x coefficientCount elements
- * populated with DCT coefficients.
- */
- std::vector<float> CreateDCTMatrix(
- int32_t inputLength,
- int32_t coefficientCount) override;
-
- /**
- * @brief Given the low and high Mel values, get the normaliser
- * for weights to be applied when populating the filter
- * bank. Override for the base class implementation.
- * @param[in] leftMel Low Mel frequency value.
- * @param[in] rightMel High Mel frequency value.
- * @param[in] useHTKMethod Bool to signal if HTK method is to be
- * used for calculation.
- * @return Value to use for normalising.
- */
- float GetMelFilterBankNormaliser(
- const float& leftMel,
- const float& rightMel,
- bool useHTKMethod) override;
-
- };
-
-} /* namespace audio */
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_ASR_WAV2LET_MFCC_HPP */
diff --git a/source/use_case/kws_asr/include/Wav2LetterModel.hpp b/source/use_case/kws_asr/include/Wav2LetterModel.hpp
deleted file mode 100644
index 0e1adc5..0000000
--- a/source/use_case/kws_asr/include/Wav2LetterModel.hpp
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_ASR_WAV2LETTER_MODEL_HPP
-#define KWS_ASR_WAV2LETTER_MODEL_HPP
-
-#include "Model.hpp"
-
-namespace arm {
-namespace app {
-namespace asr {
- extern const int g_FrameLength;
- extern const int g_FrameStride;
- extern const float g_ScoreThreshold;
- extern const int g_ctxLen;
-} /* namespace asr */
-} /* namespace app */
-} /* namespace arm */
-
-namespace arm {
-namespace app {
-
- class Wav2LetterModel : public Model {
-
- public:
- /* Indices for the expected model - based on input and output tensor shapes */
- static constexpr uint32_t ms_inputRowsIdx = 1;
- static constexpr uint32_t ms_inputColsIdx = 2;
- static constexpr uint32_t ms_outputRowsIdx = 2;
- static constexpr uint32_t ms_outputColsIdx = 3;
-
- /* Model specific constants. */
- static constexpr uint32_t ms_blankTokenIdx = 28;
- static constexpr uint32_t ms_numMfccFeatures = 13;
-
- protected:
- /** @brief Gets the reference to op resolver interface class. */
- const tflite::MicroOpResolver& GetOpResolver() override;
-
- /** @brief Adds operations to the op resolver instance. */
- bool EnlistOperations() override;
-
- const uint8_t* ModelPointer() override;
-
- size_t ModelSize() override;
-
- private:
- /* Maximum number of individual operations that can be enlisted. */
- static constexpr int ms_maxOpCnt = 5;
-
- /* A mutable op resolver instance. */
- tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_ASR_WAV2LETTER_MODEL_HPP */
diff --git a/source/use_case/kws_asr/include/Wav2LetterPostprocess.hpp b/source/use_case/kws_asr/include/Wav2LetterPostprocess.hpp
deleted file mode 100644
index d1bc9a2..0000000
--- a/source/use_case/kws_asr/include/Wav2LetterPostprocess.hpp
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_ASR_WAV2LETTER_POSTPROCESS_HPP
-#define KWS_ASR_WAV2LETTER_POSTPROCESS_HPP
-
-#include "TensorFlowLiteMicro.hpp" /* TensorFlow headers. */
-#include "BaseProcessing.hpp"
-#include "AsrClassifier.hpp"
-#include "AsrResult.hpp"
-#include "log_macros.h"
-
-namespace arm {
-namespace app {
-
- /**
- * @brief Helper class to manage tensor post-processing for "wav2letter"
- * output.
- */
- class AsrPostProcess : public BasePostProcess {
- public:
- bool m_lastIteration = false; /* Flag to set if processing the last set of data for a clip. */
-
- /**
- * @brief Constructor
- * @param[in] outputTensor Pointer to the TFLite Micro output Tensor.
- * @param[in] classifier Object used to get top N results from classification.
- * @param[in] labels Vector of string labels to identify each output of the model.
- * @param[in/out] result Vector of classification results to store decoded outputs.
- * @param[in] outputContextLen Left/right context length for output tensor.
- * @param[in] blankTokenIdx Index in the labels that the "Blank token" takes.
- * @param[in] reductionAxis The axis that the logits of each time step is on.
- **/
- AsrPostProcess(TfLiteTensor* outputTensor, AsrClassifier& classifier,
- const std::vector<std::string>& labels, asr::ResultVec& result,
- uint32_t outputContextLen,
- uint32_t blankTokenIdx, uint32_t reductionAxis);
-
- /**
- * @brief Should perform post-processing of the result of inference then
- * populate ASR result data for any later use.
- * @return true if successful, false otherwise.
- **/
- bool DoPostProcess() override;
-
- /** @brief Gets the output inner length for post-processing. */
- static uint32_t GetOutputInnerLen(const TfLiteTensor*, uint32_t outputCtxLen);
-
- /** @brief Gets the output context length (left/right) for post-processing. */
- static uint32_t GetOutputContextLen(const Model& model, uint32_t inputCtxLen);
-
- /** @brief Gets the number of feature vectors to be computed. */
- static uint32_t GetNumFeatureVectors(const Model& model);
-
- private:
- AsrClassifier& m_classifier; /* ASR Classifier object. */
- TfLiteTensor* m_outputTensor; /* Model output tensor. */
- const std::vector<std::string>& m_labels; /* ASR Labels. */
- asr::ResultVec & m_results; /* Results vector for a single inference. */
- uint32_t m_outputContextLen; /* lengths of left/right contexts for output. */
- uint32_t m_outputInnerLen; /* Length of output inner context. */
- uint32_t m_totalLen; /* Total length of the required axis. */
- uint32_t m_countIterations; /* Current number of iterations. */
- uint32_t m_blankTokenIdx; /* Index of the labels blank token. */
- uint32_t m_reductionAxisIdx; /* Axis containing output logits for a single step. */
-
- /**
- * @brief Checks if the tensor and axis index are valid
- * inputs to the object - based on how it has been initialised.
- * @return true if valid, false otherwise.
- */
- bool IsInputValid(TfLiteTensor* tensor,
- uint32_t axisIdx) const;
-
- /**
- * @brief Gets the tensor data element size in bytes based
- * on the tensor type.
- * @return Size in bytes, 0 if not supported.
- */
- static uint32_t GetTensorElementSize(TfLiteTensor* tensor);
-
- /**
- * @brief Erases sections from the data assuming row-wise
- * arrangement along the context axis.
- * @return true if successful, false otherwise.
- */
- bool EraseSectionsRowWise(uint8_t* ptrData,
- uint32_t strideSzBytes,
- bool lastIteration);
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_ASR_WAV2LETTER_POSTPROCESS_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/include/Wav2LetterPreprocess.hpp b/source/use_case/kws_asr/include/Wav2LetterPreprocess.hpp
deleted file mode 100644
index 1224c23..0000000
--- a/source/use_case/kws_asr/include/Wav2LetterPreprocess.hpp
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef KWS_ASR_WAV2LETTER_PREPROCESS_HPP
-#define KWS_ASR_WAV2LETTER_PREPROCESS_HPP
-
-#include "Wav2LetterModel.hpp"
-#include "Wav2LetterMfcc.hpp"
-#include "AudioUtils.hpp"
-#include "DataStructures.hpp"
-#include "BaseProcessing.hpp"
-#include "log_macros.h"
-
-namespace arm {
-namespace app {
-
- /* Class to facilitate pre-processing calculation for Wav2Letter model
- * for ASR. */
- using AudioWindow = audio::SlidingWindow<const int16_t>;
-
- class AsrPreProcess : public BasePreProcess {
- public:
- /**
- * @brief Constructor.
- * @param[in] inputTensor Pointer to the TFLite Micro input Tensor.
- * @param[in] numMfccFeatures Number of MFCC features per window.
- * @param[in] numFeatureFrames Number of MFCC vectors that need to be calculated
- * for an inference.
- * @param[in] mfccWindowLen Number of audio elements to calculate MFCC features per window.
- * @param[in] mfccWindowStride Stride (in number of elements) for moving the MFCC window.
- */
- AsrPreProcess(TfLiteTensor* inputTensor,
- uint32_t numMfccFeatures,
- uint32_t numFeatureFrames,
- uint32_t mfccWindowLen,
- uint32_t mfccWindowStride);
-
- /**
- * @brief Calculates the features required from audio data. This
- * includes MFCC, first and second order deltas,
- * normalisation and finally, quantisation. The tensor is
- * populated with features from a given window placed along
- * in a single row.
- * @param[in] audioData Pointer to the first element of audio data.
- * @param[in] audioDataLen Number of elements in the audio data.
- * @return true if successful, false in case of error.
- */
- bool DoPreProcess(const void* audioData, size_t audioDataLen) override;
-
- protected:
- /**
- * @brief Computes the first and second order deltas for the
- * MFCC buffers - they are assumed to be populated.
- *
- * @param[in] mfcc MFCC buffers.
- * @param[out] delta1 Result of the first diff computation.
- * @param[out] delta2 Result of the second diff computation.
- * @return true if successful, false otherwise.
- */
- static bool ComputeDeltas(Array2d<float>& mfcc,
- Array2d<float>& delta1,
- Array2d<float>& delta2);
-
- /**
- * @brief Given a 2D vector of floats, rescale it to have mean of 0 and
- * standard deviation of 1.
- * @param[in,out] vec Vector of vector of floats.
- */
- static void StandardizeVecF32(Array2d<float>& vec);
-
- /**
- * @brief Standardizes all the MFCC and delta buffers to have mean 0 and std. dev 1.
- */
- void Standarize();
-
- /**
- * @brief Given the quantisation and data type limits, computes
- * the quantised values of a floating point input data.
- * @param[in] elem Element to be quantised.
- * @param[in] quantScale Scale.
- * @param[in] quantOffset Offset.
- * @param[in] minVal Numerical limit - minimum.
- * @param[in] maxVal Numerical limit - maximum.
- * @return Floating point quantised value.
- */
- static float GetQuantElem(
- float elem,
- float quantScale,
- int quantOffset,
- float minVal,
- float maxVal);
-
- /**
- * @brief Quantises the MFCC and delta buffers, and places them
- * in the output buffer. While doing so, it transposes
- * the data. Reason: Buffers in this class are arranged
- * for "time" axis to be row major. Primary reason for
- * this being the convolution speed up (as we can use
- * contiguous memory). The output, however, requires the
- * time axis to be in column major arrangement.
- * @param[in] outputBuf Pointer to the output buffer.
- * @param[in] outputBufSz Output buffer's size.
- * @param[in] quantScale Quantisation scale.
- * @param[in] quantOffset Quantisation offset.
- */
- template <typename T>
- bool Quantise(
- T* outputBuf,
- const uint32_t outputBufSz,
- const float quantScale,
- const int quantOffset)
- {
- /* Check the output size will fit everything. */
- if (outputBufSz < (this->m_mfccBuf.size(0) * 3 * sizeof(T))) {
- printf_err("Tensor size too small for features\n");
- return false;
- }
-
- /* Populate. */
- T* outputBufMfcc = outputBuf;
- T* outputBufD1 = outputBuf + this->m_numMfccFeats;
- T* outputBufD2 = outputBufD1 + this->m_numMfccFeats;
- const uint32_t ptrIncr = this->m_numMfccFeats * 2; /* (3 vectors - 1 vector) */
-
- const float minVal = std::numeric_limits<T>::min();
- const float maxVal = std::numeric_limits<T>::max();
-
- /* Need to transpose while copying and concatenating the tensor. */
- for (uint32_t j = 0; j < this->m_numFeatureFrames; ++j) {
- for (uint32_t i = 0; i < this->m_numMfccFeats; ++i) {
- *outputBufMfcc++ = static_cast<T>(AsrPreProcess::GetQuantElem(
- this->m_mfccBuf(i, j), quantScale,
- quantOffset, minVal, maxVal));
- *outputBufD1++ = static_cast<T>(AsrPreProcess::GetQuantElem(
- this->m_delta1Buf(i, j), quantScale,
- quantOffset, minVal, maxVal));
- *outputBufD2++ = static_cast<T>(AsrPreProcess::GetQuantElem(
- this->m_delta2Buf(i, j), quantScale,
- quantOffset, minVal, maxVal));
- }
- outputBufMfcc += ptrIncr;
- outputBufD1 += ptrIncr;
- outputBufD2 += ptrIncr;
- }
-
- return true;
- }
-
- private:
- audio::Wav2LetterMFCC m_mfcc; /* MFCC instance. */
- TfLiteTensor* m_inputTensor; /* Model input tensor. */
-
- /* Actual buffers to be populated. */
- Array2d<float> m_mfccBuf; /* Contiguous buffer 1D: MFCC */
- Array2d<float> m_delta1Buf; /* Contiguous buffer 1D: Delta 1 */
- Array2d<float> m_delta2Buf; /* Contiguous buffer 1D: Delta 2 */
-
- uint32_t m_mfccWindowLen; /* Window length for MFCC. */
- uint32_t m_mfccWindowStride; /* Window stride len for MFCC. */
- uint32_t m_numMfccFeats; /* Number of MFCC features per window. */
- uint32_t m_numFeatureFrames; /* How many sets of m_numMfccFeats. */
- AudioWindow m_mfccSlidingWindow; /* Sliding window to calculate MFCCs. */
-
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* KWS_ASR_WAV2LETTER_PREPROCESS_HPP */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/src/AsrClassifier.cc b/source/use_case/kws_asr/src/AsrClassifier.cc
deleted file mode 100644
index 9c18b14..0000000
--- a/source/use_case/kws_asr/src/AsrClassifier.cc
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "AsrClassifier.hpp"
-
-#include "log_macros.h"
-#include "TensorFlowLiteMicro.hpp"
-#include "Wav2LetterModel.hpp"
-
-template<typename T>
-bool arm::app::AsrClassifier::GetTopResults(TfLiteTensor* tensor,
- std::vector<ClassificationResult>& vecResults,
- const std::vector <std::string>& labels, double scale, double zeroPoint)
-{
- const uint32_t nElems = tensor->dims->data[arm::app::Wav2LetterModel::ms_outputRowsIdx];
- const uint32_t nLetters = tensor->dims->data[arm::app::Wav2LetterModel::ms_outputColsIdx];
-
- if (nLetters != labels.size()) {
- printf("Output size doesn't match the labels' size\n");
- return false;
- }
-
- /* NOTE: tensor's size verification against labels should be
- * checked by the calling/public function. */
- if (nLetters < 1) {
- return false;
- }
-
- /* Final results' container. */
- vecResults = std::vector<ClassificationResult>(nElems);
-
- T* tensorData = tflite::GetTensorData<T>(tensor);
-
- /* Get the top 1 results. */
- for (uint32_t i = 0, row = 0; i < nElems; ++i, row+=nLetters) {
- std::pair<T, uint32_t> top_1 = std::make_pair(tensorData[row], 0);
-
- for (uint32_t j = 1; j < nLetters; ++j) {
- if (top_1.first < tensorData[row + j]) {
- top_1.first = tensorData[row + j];
- top_1.second = j;
- }
- }
-
- double score = static_cast<int> (top_1.first);
- vecResults[i].m_normalisedVal = scale * (score - zeroPoint);
- vecResults[i].m_label = labels[top_1.second];
- vecResults[i].m_labelIdx = top_1.second;
- }
-
- return true;
-}
-template bool arm::app::AsrClassifier::GetTopResults<uint8_t>(TfLiteTensor* tensor,
- std::vector<ClassificationResult>& vecResults,
- const std::vector <std::string>& labels, double scale, double zeroPoint);
-template bool arm::app::AsrClassifier::GetTopResults<int8_t>(TfLiteTensor* tensor,
- std::vector<ClassificationResult>& vecResults,
- const std::vector <std::string>& labels, double scale, double zeroPoint);
-
-bool arm::app::AsrClassifier::GetClassificationResults(
- TfLiteTensor* outputTensor,
- std::vector<ClassificationResult>& vecResults,
- const std::vector <std::string>& labels, uint32_t topNCount, bool use_softmax)
-{
- UNUSED(use_softmax);
- vecResults.clear();
-
- constexpr int minTensorDims = static_cast<int>(
- (arm::app::Wav2LetterModel::ms_outputRowsIdx > arm::app::Wav2LetterModel::ms_outputColsIdx)?
- arm::app::Wav2LetterModel::ms_outputRowsIdx : arm::app::Wav2LetterModel::ms_outputColsIdx);
-
- constexpr uint32_t outColsIdx = arm::app::Wav2LetterModel::ms_outputColsIdx;
-
- /* Sanity checks. */
- if (outputTensor == nullptr) {
- printf_err("Output vector is null pointer.\n");
- return false;
- } else if (outputTensor->dims->size < minTensorDims) {
- printf_err("Output tensor expected to be 3D (1, m, n)\n");
- return false;
- } else if (static_cast<uint32_t>(outputTensor->dims->data[outColsIdx]) < topNCount) {
- printf_err("Output vectors are smaller than %" PRIu32 "\n", topNCount);
- return false;
- } else if (static_cast<uint32_t>(outputTensor->dims->data[outColsIdx]) != labels.size()) {
- printf("Output size doesn't match the labels' size\n");
- return false;
- }
-
- if (topNCount != 1) {
- warn("TopNCount value ignored in this implementation\n");
- }
-
- /* To return the floating point values, we need quantization parameters. */
- QuantParams quantParams = GetTensorQuantParams(outputTensor);
-
- bool resultState;
-
- switch (outputTensor->type) {
- case kTfLiteUInt8:
- resultState = this->GetTopResults<uint8_t>(
- outputTensor, vecResults,
- labels, quantParams.scale,
- quantParams.offset);
- break;
- case kTfLiteInt8:
- resultState = this->GetTopResults<int8_t>(
- outputTensor, vecResults,
- labels, quantParams.scale,
- quantParams.offset);
- break;
- default:
- printf_err("Tensor type %s not supported by classifier\n",
- TfLiteTypeGetName(outputTensor->type));
- return false;
- }
-
- if (!resultState) {
- printf_err("Failed to get sorted set\n");
- return false;
- }
-
- return true;
-} \ No newline at end of file
diff --git a/source/use_case/kws_asr/src/KwsProcessing.cc b/source/use_case/kws_asr/src/KwsProcessing.cc
deleted file mode 100644
index 328709d..0000000
--- a/source/use_case/kws_asr/src/KwsProcessing.cc
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "KwsProcessing.hpp"
-#include "ImageUtils.hpp"
-#include "log_macros.h"
-#include "MicroNetKwsModel.hpp"
-
-namespace arm {
-namespace app {
-
- KwsPreProcess::KwsPreProcess(TfLiteTensor* inputTensor, size_t numFeatures, size_t numMfccFrames,
- int mfccFrameLength, int mfccFrameStride
- ):
- m_inputTensor{inputTensor},
- m_mfccFrameLength{mfccFrameLength},
- m_mfccFrameStride{mfccFrameStride},
- m_numMfccFrames{numMfccFrames},
- m_mfcc{audio::MicroNetKwsMFCC(numFeatures, mfccFrameLength)}
- {
- this->m_mfcc.Init();
-
- /* Deduce the data length required for 1 inference from the network parameters. */
- this->m_audioDataWindowSize = this->m_numMfccFrames * this->m_mfccFrameStride +
- (this->m_mfccFrameLength - this->m_mfccFrameStride);
-
- /* Creating an MFCC feature sliding window for the data required for 1 inference. */
- this->m_mfccSlidingWindow = audio::SlidingWindow<const int16_t>(nullptr, this->m_audioDataWindowSize,
- this->m_mfccFrameLength, this->m_mfccFrameStride);
-
- /* For longer audio clips we choose to move by half the audio window size
- * => for a 1 second window size there is an overlap of 0.5 seconds. */
- this->m_audioDataStride = this->m_audioDataWindowSize / 2;
-
- /* To have the previously calculated features re-usable, stride must be multiple
- * of MFCC features window stride. Reduce stride through audio if needed. */
- if (0 != this->m_audioDataStride % this->m_mfccFrameStride) {
- this->m_audioDataStride -= this->m_audioDataStride % this->m_mfccFrameStride;
- }
-
- this->m_numMfccVectorsInAudioStride = this->m_audioDataStride / this->m_mfccFrameStride;
-
- /* Calculate number of the feature vectors in the window overlap region.
- * These feature vectors will be reused.*/
- this->m_numReusedMfccVectors = this->m_mfccSlidingWindow.TotalStrides() + 1
- - this->m_numMfccVectorsInAudioStride;
-
- /* Construct feature calculation function. */
- this->m_mfccFeatureCalculator = GetFeatureCalculator(this->m_mfcc, this->m_inputTensor,
- this->m_numReusedMfccVectors);
-
- if (!this->m_mfccFeatureCalculator) {
- printf_err("Feature calculator not initialized.");
- }
- }
-
- bool KwsPreProcess::DoPreProcess(const void* data, size_t inputSize)
- {
- UNUSED(inputSize);
- if (data == nullptr) {
- printf_err("Data pointer is null");
- }
-
- /* Set the features sliding window to the new address. */
- auto input = static_cast<const int16_t*>(data);
- this->m_mfccSlidingWindow.Reset(input);
-
- /* Cache is only usable if we have more than 1 inference in an audio clip. */
- bool useCache = this->m_audioWindowIndex > 0 && this->m_numReusedMfccVectors > 0;
-
- /* Use a sliding window to calculate MFCC features frame by frame. */
- while (this->m_mfccSlidingWindow.HasNext()) {
- const int16_t* mfccWindow = this->m_mfccSlidingWindow.Next();
-
- std::vector<int16_t> mfccFrameAudioData = std::vector<int16_t>(mfccWindow,
- mfccWindow + this->m_mfccFrameLength);
-
- /* Compute features for this window and write them to input tensor. */
- this->m_mfccFeatureCalculator(mfccFrameAudioData, this->m_mfccSlidingWindow.Index(),
- useCache, this->m_numMfccVectorsInAudioStride);
- }
-
- debug("Input tensor populated \n");
-
- return true;
- }
-
- /**
- * @brief Generic feature calculator factory.
- *
- * Returns lambda function to compute features using features cache.
- * Real features math is done by a lambda function provided as a parameter.
- * Features are written to input tensor memory.
- *
- * @tparam T Feature vector type.
- * @param[in] inputTensor Model input tensor pointer.
- * @param[in] cacheSize Number of feature vectors to cache. Defined by the sliding window overlap.
- * @param[in] compute Features calculator function.
- * @return Lambda function to compute features.
- */
- template<class T>
- std::function<void (std::vector<int16_t>&, size_t, bool, size_t)>
- KwsPreProcess::FeatureCalc(TfLiteTensor* inputTensor, size_t cacheSize,
- std::function<std::vector<T> (std::vector<int16_t>& )> compute)
- {
- /* Feature cache to be captured by lambda function. */
- static std::vector<std::vector<T>> featureCache = std::vector<std::vector<T>>(cacheSize);
-
- return [=](std::vector<int16_t>& audioDataWindow,
- size_t index,
- bool useCache,
- size_t featuresOverlapIndex)
- {
- T* tensorData = tflite::GetTensorData<T>(inputTensor);
- std::vector<T> features;
-
- /* Reuse features from cache if cache is ready and sliding windows overlap.
- * Overlap is in the beginning of sliding window with a size of a feature cache. */
- if (useCache && index < featureCache.size()) {
- features = std::move(featureCache[index]);
- } else {
- features = std::move(compute(audioDataWindow));
- }
- auto size = features.size();
- auto sizeBytes = sizeof(T) * size;
- std::memcpy(tensorData + (index * size), features.data(), sizeBytes);
-
- /* Start renewing cache as soon iteration goes out of the windows overlap. */
- if (index >= featuresOverlapIndex) {
- featureCache[index - featuresOverlapIndex] = std::move(features);
- }
- };
- }
-
- template std::function<void (std::vector<int16_t>&, size_t , bool, size_t)>
- KwsPreProcess::FeatureCalc<int8_t>(TfLiteTensor* inputTensor,
- size_t cacheSize,
- std::function<std::vector<int8_t> (std::vector<int16_t>&)> compute);
-
- template std::function<void(std::vector<int16_t>&, size_t, bool, size_t)>
- KwsPreProcess::FeatureCalc<float>(TfLiteTensor* inputTensor,
- size_t cacheSize,
- std::function<std::vector<float>(std::vector<int16_t>&)> compute);
-
-
- std::function<void (std::vector<int16_t>&, int, bool, size_t)>
- KwsPreProcess::GetFeatureCalculator(audio::MicroNetKwsMFCC& mfcc, TfLiteTensor* inputTensor, size_t cacheSize)
- {
- std::function<void (std::vector<int16_t>&, size_t, bool, size_t)> mfccFeatureCalc;
-
- TfLiteQuantization quant = inputTensor->quantization;
-
- if (kTfLiteAffineQuantization == quant.type) {
- auto *quantParams = (TfLiteAffineQuantization *) quant.params;
- const float quantScale = quantParams->scale->data[0];
- const int quantOffset = quantParams->zero_point->data[0];
-
- switch (inputTensor->type) {
- case kTfLiteInt8: {
- mfccFeatureCalc = this->FeatureCalc<int8_t>(inputTensor,
- cacheSize,
- [=, &mfcc](std::vector<int16_t>& audioDataWindow) {
- return mfcc.MfccComputeQuant<int8_t>(audioDataWindow,
- quantScale,
- quantOffset);
- }
- );
- break;
- }
- default:
- printf_err("Tensor type %s not supported\n", TfLiteTypeGetName(inputTensor->type));
- }
- } else {
- mfccFeatureCalc = this->FeatureCalc<float>(inputTensor, cacheSize,
- [&mfcc](std::vector<int16_t>& audioDataWindow) {
- return mfcc.MfccCompute(audioDataWindow); }
- );
- }
- return mfccFeatureCalc;
- }
-
- KwsPostProcess::KwsPostProcess(TfLiteTensor* outputTensor, Classifier& classifier,
- const std::vector<std::string>& labels,
- std::vector<ClassificationResult>& results)
- :m_outputTensor{outputTensor},
- m_kwsClassifier{classifier},
- m_labels{labels},
- m_results{results}
- {}
-
- bool KwsPostProcess::DoPostProcess()
- {
- return this->m_kwsClassifier.GetClassificationResults(
- this->m_outputTensor, this->m_results,
- this->m_labels, 1, true);
- }
-
-} /* namespace app */
-} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/src/MainLoop.cc b/source/use_case/kws_asr/src/MainLoop.cc
index f1d97a0..2365264 100644
--- a/source/use_case/kws_asr/src/MainLoop.cc
+++ b/source/use_case/kws_asr/src/MainLoop.cc
@@ -23,7 +23,24 @@
#include "Wav2LetterModel.hpp" /* ASR model class for running inference. */
#include "UseCaseCommonUtils.hpp" /* Utils functions. */
#include "UseCaseHandler.hpp" /* Handlers for different user options. */
-#include "log_macros.h"
+#include "log_macros.h" /* Logging functions */
+#include "BufAttributes.hpp" /* Buffer attributes to be applied */
+
+namespace arm {
+namespace app {
+ static uint8_t tensorArena[ACTIVATION_BUF_SZ] ACTIVATION_BUF_ATTRIBUTE;
+
+ namespace asr {
+ extern uint8_t* GetModelPointer();
+ extern size_t GetModelLen();
+ }
+
+ namespace kws {
+ extern uint8_t* GetModelPointer();
+ extern size_t GetModelLen();
+ }
+} /* namespace app */
+} /* namespace arm */
using KwsClassifier = arm::app::Classifier;
@@ -60,14 +77,29 @@ void main_loop()
arm::app::Wav2LetterModel asrModel;
/* Load the models. */
- if (!kwsModel.Init()) {
+ if (!kwsModel.Init(arm::app::tensorArena,
+ sizeof(arm::app::tensorArena),
+ arm::app::kws::GetModelPointer(),
+ arm::app::kws::GetModelLen())) {
printf_err("Failed to initialise KWS model\n");
return;
}
+#if !defined(ARM_NPU)
+ /* If it is not a NPU build check if the model contains a NPU operator */
+ if (kwsModel.ContainsEthosUOperator()) {
+ printf_err("No driver support for Ethos-U operator found in the KWS model.\n");
+ return;
+ }
+#endif /* ARM_NPU */
+
/* Initialise the asr model using the same allocator from KWS
* to re-use the tensor arena. */
- if (!asrModel.Init(kwsModel.GetAllocator())) {
+ if (!asrModel.Init(arm::app::tensorArena,
+ sizeof(arm::app::tensorArena),
+ arm::app::asr::GetModelPointer(),
+ arm::app::asr::GetModelLen(),
+ kwsModel.GetAllocator())) {
printf_err("Failed to initialise ASR model\n");
return;
} else if (!VerifyTensorDimensions(asrModel)) {
@@ -75,6 +107,14 @@ void main_loop()
return;
}
+#if !defined(ARM_NPU)
+ /* If it is not a NPU build check if the model contains a NPU operator */
+ if (asrModel.ContainsEthosUOperator()) {
+ printf_err("No driver support for Ethos-U operator found in the ASR model.\n");
+ return;
+ }
+#endif /* ARM_NPU */
+
/* Instantiate application context. */
arm::app::ApplicationContext caseContext;
diff --git a/source/use_case/kws_asr/src/MicroNetKwsModel.cc b/source/use_case/kws_asr/src/MicroNetKwsModel.cc
deleted file mode 100644
index 663faa0..0000000
--- a/source/use_case/kws_asr/src/MicroNetKwsModel.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "MicroNetKwsModel.hpp"
-#include "log_macros.h"
-
-namespace arm {
-namespace app {
-namespace kws {
- extern uint8_t* GetModelPointer();
- extern size_t GetModelLen();
-} /* namespace kws */
-} /* namespace app */
-} /* namespace arm */
-
-const tflite::MicroOpResolver& arm::app::MicroNetKwsModel::GetOpResolver()
-{
- return this->m_opResolver;
-}
-
-bool arm::app::MicroNetKwsModel::EnlistOperations()
-{
- this->m_opResolver.AddAveragePool2D();
- this->m_opResolver.AddConv2D();
- this->m_opResolver.AddDepthwiseConv2D();
- this->m_opResolver.AddFullyConnected();
- this->m_opResolver.AddRelu();
- this->m_opResolver.AddReshape();
-
-#if defined(ARM_NPU)
- if (kTfLiteOk == this->m_opResolver.AddEthosU()) {
- info("Added %s support to op resolver\n",
- tflite::GetString_ETHOSU());
- } else {
- printf_err("Failed to add Arm NPU support to op resolver.");
- return false;
- }
-#endif /* ARM_NPU */
- return true;
-}
-
-const uint8_t* arm::app::MicroNetKwsModel::ModelPointer()
-{
- return arm::app::kws::GetModelPointer();
-}
-
-size_t arm::app::MicroNetKwsModel::ModelSize()
-{
- return arm::app::kws::GetModelLen();
-} \ No newline at end of file
diff --git a/source/use_case/kws_asr/src/OutputDecode.cc b/source/use_case/kws_asr/src/OutputDecode.cc
deleted file mode 100644
index 41fbe07..0000000
--- a/source/use_case/kws_asr/src/OutputDecode.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "OutputDecode.hpp"
-
-namespace arm {
-namespace app {
-namespace audio {
-namespace asr {
-
- std::string DecodeOutput(const std::vector<ClassificationResult>& vecResults)
- {
- std::string CleanOutputBuffer;
-
- for (size_t i = 0; i < vecResults.size(); ++i) /* For all elements in vector. */
- {
- while (i+1 < vecResults.size() &&
- vecResults[i].m_label == vecResults[i+1].m_label) /* While the current element is equal to the next, ignore it and move on. */
- {
- ++i;
- }
- if (vecResults[i].m_label != "$") /* $ is a character used to represent unknown and double characters so should not be in output. */
- {
- CleanOutputBuffer += vecResults[i].m_label; /* If the element is different to the next, it will be appended to CleanOutputBuffer. */
- }
- }
-
- return CleanOutputBuffer; /* Return string type containing clean output. */
- }
-
-} /* namespace asr */
-} /* namespace audio */
-} /* namespace app */
-} /* namespace arm */
diff --git a/source/use_case/kws_asr/src/UseCaseHandler.cc b/source/use_case/kws_asr/src/UseCaseHandler.cc
index 01aefae..9427ae0 100644
--- a/source/use_case/kws_asr/src/UseCaseHandler.cc
+++ b/source/use_case/kws_asr/src/UseCaseHandler.cc
@@ -25,6 +25,7 @@
#include "MicroNetKwsMfcc.hpp"
#include "Classifier.hpp"
#include "KwsResult.hpp"
+#include "Wav2LetterModel.hpp"
#include "Wav2LetterMfcc.hpp"
#include "Wav2LetterPreprocess.hpp"
#include "Wav2LetterPostprocess.hpp"
@@ -470,4 +471,4 @@ namespace app {
}
} /* namespace app */
-} /* namespace arm */ \ No newline at end of file
+} /* namespace arm */
diff --git a/source/use_case/kws_asr/src/Wav2LetterMfcc.cc b/source/use_case/kws_asr/src/Wav2LetterMfcc.cc
deleted file mode 100644
index f2c50f3..0000000
--- a/source/use_case/kws_asr/src/Wav2LetterMfcc.cc
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "Wav2LetterMfcc.hpp"
-
-#include "PlatformMath.hpp"
-#include "log_macros.h"
-
-#include <cfloat>
-
-namespace arm {
-namespace app {
-namespace audio {
-
- bool Wav2LetterMFCC::ApplyMelFilterBank(
- std::vector<float>& fftVec,
- std::vector<std::vector<float>>& melFilterBank,
- std::vector<uint32_t>& filterBankFilterFirst,
- std::vector<uint32_t>& filterBankFilterLast,
- std::vector<float>& melEnergies)
- {
- const size_t numBanks = melEnergies.size();
-
- if (numBanks != filterBankFilterFirst.size() ||
- numBanks != filterBankFilterLast.size()) {
- printf_err("unexpected filter bank lengths\n");
- return false;
- }
-
- for (size_t bin = 0; bin < numBanks; ++bin) {
- auto filterBankIter = melFilterBank[bin].begin();
- auto end = melFilterBank[bin].end();
- /* Avoid log of zero at later stages, same value used in librosa.
- * The number was used during our default wav2letter model training. */
- float melEnergy = 1e-10;
- const uint32_t firstIndex = filterBankFilterFirst[bin];
- const uint32_t lastIndex = std::min<uint32_t>(filterBankFilterLast[bin], fftVec.size() - 1);
-
- for (uint32_t i = firstIndex; i <= lastIndex && filterBankIter != end; ++i) {
- melEnergy += (*filterBankIter++ * fftVec[i]);
- }
-
- melEnergies[bin] = melEnergy;
- }
-
- return true;
- }
-
- void Wav2LetterMFCC::ConvertToLogarithmicScale(
- std::vector<float>& melEnergies)
- {
- float maxMelEnergy = -FLT_MAX;
-
- /* Container for natural logarithms of mel energies. */
- std::vector <float> vecLogEnergies(melEnergies.size(), 0.f);
-
- /* Because we are taking natural logs, we need to multiply by log10(e).
- * Also, for wav2letter model, we scale our log10 values by 10. */
- constexpr float multiplier = 10.0 * /* Default scalar. */
- 0.4342944819032518; /* log10f(std::exp(1.0))*/
-
- /* Take log of the whole vector. */
- math::MathUtils::VecLogarithmF32(melEnergies, vecLogEnergies);
-
- /* Scale the log values and get the max. */
- for (auto iterM = melEnergies.begin(), iterL = vecLogEnergies.begin();
- iterM != melEnergies.end() && iterL != vecLogEnergies.end(); ++iterM, ++iterL) {
-
- *iterM = *iterL * multiplier;
-
- /* Save the max mel energy. */
- if (*iterM > maxMelEnergy) {
- maxMelEnergy = *iterM;
- }
- }
-
- /* Clamp the mel energies. */
- constexpr float maxDb = 80.0;
- const float clampLevelLowdB = maxMelEnergy - maxDb;
- for (float & melEnergie : melEnergies) {
- melEnergie = std::max(melEnergie, clampLevelLowdB);
- }
- }
-
- std::vector<float> Wav2LetterMFCC::CreateDCTMatrix(
- const int32_t inputLength,
- const int32_t coefficientCount)
- {
- std::vector<float> dctMatix(inputLength * coefficientCount);
-
- /* Orthonormal normalization. */
- const float normalizerK0 = 2 * math::MathUtils::SqrtF32(1.0f /
- static_cast<float>(4*inputLength));
- const float normalizer = 2 * math::MathUtils::SqrtF32(1.0f /
- static_cast<float>(2*inputLength));
-
- const float angleIncr = M_PI/inputLength;
- float angle = angleIncr; /* We start using it at k = 1 loop. */
-
- /* First row of DCT will use normalizer K0 */
- for (int32_t n = 0; n < inputLength; ++n) {
- dctMatix[n] = normalizerK0 /* cos(0) = 1 */;
- }
-
- /* Second row (index = 1) onwards, we use standard normalizer. */
- for (int32_t k = 1, m = inputLength; k < coefficientCount; ++k, m += inputLength) {
- for (int32_t n = 0; n < inputLength; ++n) {
- dctMatix[m+n] = normalizer *
- math::MathUtils::CosineF32((n + 0.5f) * angle);
- }
- angle += angleIncr;
- }
- return dctMatix;
- }
-
- float Wav2LetterMFCC::GetMelFilterBankNormaliser(
- const float& leftMel,
- const float& rightMel,
- const bool useHTKMethod)
- {
- /* Slaney normalization for mel weights. */
- return (2.0f / (MFCC::InverseMelScale(rightMel, useHTKMethod) -
- MFCC::InverseMelScale(leftMel, useHTKMethod)));
- }
-
-} /* namespace audio */
-} /* namespace app */
-} /* namespace arm */
diff --git a/source/use_case/kws_asr/src/Wav2LetterModel.cc b/source/use_case/kws_asr/src/Wav2LetterModel.cc
deleted file mode 100644
index 52bd23a..0000000
--- a/source/use_case/kws_asr/src/Wav2LetterModel.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "Wav2LetterModel.hpp"
-#include "log_macros.h"
-
-namespace arm {
-namespace app {
-namespace asr {
- extern uint8_t* GetModelPointer();
- extern size_t GetModelLen();
-}
-} /* namespace app */
-} /* namespace arm */
-
-const tflite::MicroOpResolver& arm::app::Wav2LetterModel::GetOpResolver()
-{
- return this->m_opResolver;
-}
-
-bool arm::app::Wav2LetterModel::EnlistOperations()
-{
- this->m_opResolver.AddConv2D();
- this->m_opResolver.AddLeakyRelu();
- this->m_opResolver.AddSoftmax();
- this->m_opResolver.AddReshape();
-
-#if defined(ARM_NPU)
- if (kTfLiteOk == this->m_opResolver.AddEthosU()) {
- info("Added %s support to op resolver\n",
- tflite::GetString_ETHOSU());
- } else {
- printf_err("Failed to add Arm NPU support to op resolver.");
- return false;
- }
-#endif /* ARM_NPU */
- return true;
-}
-
-const uint8_t* arm::app::Wav2LetterModel::ModelPointer()
-{
- return arm::app::asr::GetModelPointer();
-}
-
-size_t arm::app::Wav2LetterModel::ModelSize()
-{
- return arm::app::asr::GetModelLen();
-} \ No newline at end of file
diff --git a/source/use_case/kws_asr/src/Wav2LetterPostprocess.cc b/source/use_case/kws_asr/src/Wav2LetterPostprocess.cc
deleted file mode 100644
index 42f434e..0000000
--- a/source/use_case/kws_asr/src/Wav2LetterPostprocess.cc
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "Wav2LetterPostprocess.hpp"
-
-#include "Wav2LetterModel.hpp"
-#include "log_macros.h"
-
-#include <cmath>
-
-namespace arm {
-namespace app {
-
- AsrPostProcess::AsrPostProcess(TfLiteTensor* outputTensor, AsrClassifier& classifier,
- const std::vector<std::string>& labels, std::vector<ClassificationResult>& results,
- const uint32_t outputContextLen,
- const uint32_t blankTokenIdx, const uint32_t reductionAxisIdx
- ):
- m_classifier(classifier),
- m_outputTensor(outputTensor),
- m_labels{labels},
- m_results(results),
- m_outputContextLen(outputContextLen),
- m_countIterations(0),
- m_blankTokenIdx(blankTokenIdx),
- m_reductionAxisIdx(reductionAxisIdx)
- {
- this->m_outputInnerLen = AsrPostProcess::GetOutputInnerLen(this->m_outputTensor, this->m_outputContextLen);
- this->m_totalLen = (2 * this->m_outputContextLen + this->m_outputInnerLen);
- }
-
- bool AsrPostProcess::DoPostProcess()
- {
- /* Basic checks. */
- if (!this->IsInputValid(this->m_outputTensor, this->m_reductionAxisIdx)) {
- return false;
- }
-
- /* Irrespective of tensor type, we use unsigned "byte" */
- auto* ptrData = tflite::GetTensorData<uint8_t>(this->m_outputTensor);
- const uint32_t elemSz = AsrPostProcess::GetTensorElementSize(this->m_outputTensor);
-
- /* Other sanity checks. */
- if (0 == elemSz) {
- printf_err("Tensor type not supported for post processing\n");
- return false;
- } else if (elemSz * this->m_totalLen > this->m_outputTensor->bytes) {
- printf_err("Insufficient number of tensor bytes\n");
- return false;
- }
-
- /* Which axis do we need to process? */
- switch (this->m_reductionAxisIdx) {
- case Wav2LetterModel::ms_outputRowsIdx:
- this->EraseSectionsRowWise(
- ptrData, elemSz * this->m_outputTensor->dims->data[Wav2LetterModel::ms_outputColsIdx],
- this->m_lastIteration);
- break;
- default:
- printf_err("Unsupported axis index: %" PRIu32 "\n", this->m_reductionAxisIdx);
- return false;
- }
- this->m_classifier.GetClassificationResults(this->m_outputTensor,
- this->m_results, this->m_labels, 1);
-
- return true;
- }
-
- bool AsrPostProcess::IsInputValid(TfLiteTensor* tensor, const uint32_t axisIdx) const
- {
- if (nullptr == tensor) {
- return false;
- }
-
- if (static_cast<int>(axisIdx) >= tensor->dims->size) {
- printf_err("Invalid axis index: %" PRIu32 "; Max: %d\n",
- axisIdx, tensor->dims->size);
- return false;
- }
-
- if (static_cast<int>(this->m_totalLen) !=
- tensor->dims->data[axisIdx]) {
- printf_err("Unexpected tensor dimension for axis %d, got %d, \n",
- axisIdx, tensor->dims->data[axisIdx]);
- return false;
- }
-
- return true;
- }
-
- uint32_t AsrPostProcess::GetTensorElementSize(TfLiteTensor* tensor)
- {
- switch(tensor->type) {
- case kTfLiteUInt8:
- case kTfLiteInt8:
- return 1;
- case kTfLiteInt16:
- return 2;
- case kTfLiteInt32:
- case kTfLiteFloat32:
- return 4;
- default:
- printf_err("Unsupported tensor type %s\n",
- TfLiteTypeGetName(tensor->type));
- }
-
- return 0;
- }
-
- bool AsrPostProcess::EraseSectionsRowWise(
- uint8_t* ptrData,
- const uint32_t strideSzBytes,
- const bool lastIteration)
- {
- /* In this case, the "zero-ing" is quite simple as the region
- * to be zeroed sits in contiguous memory (row-major). */
- const uint32_t eraseLen = strideSzBytes * this->m_outputContextLen;
-
- /* Erase left context? */
- if (this->m_countIterations > 0) {
- /* Set output of each classification window to the blank token. */
- std::memset(ptrData, 0, eraseLen);
- for (size_t windowIdx = 0; windowIdx < this->m_outputContextLen; windowIdx++) {
- ptrData[windowIdx*strideSzBytes + this->m_blankTokenIdx] = 1;
- }
- }
-
- /* Erase right context? */
- if (false == lastIteration) {
- uint8_t* rightCtxPtr = ptrData + (strideSzBytes * (this->m_outputContextLen + this->m_outputInnerLen));
- /* Set output of each classification window to the blank token. */
- std::memset(rightCtxPtr, 0, eraseLen);
- for (size_t windowIdx = 0; windowIdx < this->m_outputContextLen; windowIdx++) {
- rightCtxPtr[windowIdx*strideSzBytes + this->m_blankTokenIdx] = 1;
- }
- }
-
- if (lastIteration) {
- this->m_countIterations = 0;
- } else {
- ++this->m_countIterations;
- }
-
- return true;
- }
-
- uint32_t AsrPostProcess::GetNumFeatureVectors(const Model& model)
- {
- TfLiteTensor* inputTensor = model.GetInputTensor(0);
- const int inputRows = std::max(inputTensor->dims->data[Wav2LetterModel::ms_inputRowsIdx], 0);
- if (inputRows == 0) {
- printf_err("Error getting number of input rows for axis: %" PRIu32 "\n",
- Wav2LetterModel::ms_inputRowsIdx);
- }
- return inputRows;
- }
-
- uint32_t AsrPostProcess::GetOutputInnerLen(const TfLiteTensor* outputTensor, const uint32_t outputCtxLen)
- {
- const uint32_t outputRows = std::max(outputTensor->dims->data[Wav2LetterModel::ms_outputRowsIdx], 0);
- if (outputRows == 0) {
- printf_err("Error getting number of output rows for axis: %" PRIu32 "\n",
- Wav2LetterModel::ms_outputRowsIdx);
- }
-
- /* Watching for underflow. */
- int innerLen = (outputRows - (2 * outputCtxLen));
-
- return std::max(innerLen, 0);
- }
-
- uint32_t AsrPostProcess::GetOutputContextLen(const Model& model, const uint32_t inputCtxLen)
- {
- const uint32_t inputRows = AsrPostProcess::GetNumFeatureVectors(model);
- const uint32_t inputInnerLen = inputRows - (2 * inputCtxLen);
- constexpr uint32_t ms_outputRowsIdx = Wav2LetterModel::ms_outputRowsIdx;
-
- /* Check to make sure that the input tensor supports the above
- * context and inner lengths. */
- if (inputRows <= 2 * inputCtxLen || inputRows <= inputInnerLen) {
- printf_err("Input rows not compatible with ctx of %" PRIu32 "\n",
- inputCtxLen);
- return 0;
- }
-
- TfLiteTensor* outputTensor = model.GetOutputTensor(0);
- const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);
- if (outputRows == 0) {
- printf_err("Error getting number of output rows for axis: %" PRIu32 "\n",
- Wav2LetterModel::ms_outputRowsIdx);
- return 0;
- }
-
- const float inOutRowRatio = static_cast<float>(inputRows) /
- static_cast<float>(outputRows);
-
- return std::round(static_cast<float>(inputCtxLen) / inOutRowRatio);
- }
-
-} /* namespace app */
-} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/src/Wav2LetterPreprocess.cc b/source/use_case/kws_asr/src/Wav2LetterPreprocess.cc
deleted file mode 100644
index 92b0631..0000000
--- a/source/use_case/kws_asr/src/Wav2LetterPreprocess.cc
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "Wav2LetterPreprocess.hpp"
-
-#include "PlatformMath.hpp"
-#include "TensorFlowLiteMicro.hpp"
-
-#include <algorithm>
-#include <cmath>
-
-namespace arm {
-namespace app {
-
- AsrPreProcess::AsrPreProcess(TfLiteTensor* inputTensor, const uint32_t numMfccFeatures,
- const uint32_t numFeatureFrames, const uint32_t mfccWindowLen,
- const uint32_t mfccWindowStride
- ):
- m_mfcc(numMfccFeatures, mfccWindowLen),
- m_inputTensor(inputTensor),
- m_mfccBuf(numMfccFeatures, numFeatureFrames),
- m_delta1Buf(numMfccFeatures, numFeatureFrames),
- m_delta2Buf(numMfccFeatures, numFeatureFrames),
- m_mfccWindowLen(mfccWindowLen),
- m_mfccWindowStride(mfccWindowStride),
- m_numMfccFeats(numMfccFeatures),
- m_numFeatureFrames(numFeatureFrames)
- {
- if (numMfccFeatures > 0 && mfccWindowLen > 0) {
- this->m_mfcc.Init();
- }
- }
-
- bool AsrPreProcess::DoPreProcess(const void* audioData, const size_t audioDataLen)
- {
- this->m_mfccSlidingWindow = audio::SlidingWindow<const int16_t>(
- static_cast<const int16_t*>(audioData), audioDataLen,
- this->m_mfccWindowLen, this->m_mfccWindowStride);
-
- uint32_t mfccBufIdx = 0;
-
- std::fill(m_mfccBuf.begin(), m_mfccBuf.end(), 0.f);
- std::fill(m_delta1Buf.begin(), m_delta1Buf.end(), 0.f);
- std::fill(m_delta2Buf.begin(), m_delta2Buf.end(), 0.f);
-
- /* While we can slide over the audio. */
- while (this->m_mfccSlidingWindow.HasNext()) {
- const int16_t* mfccWindow = this->m_mfccSlidingWindow.Next();
- auto mfccAudioData = std::vector<int16_t>(
- mfccWindow,
- mfccWindow + this->m_mfccWindowLen);
- auto mfcc = this->m_mfcc.MfccCompute(mfccAudioData);
- for (size_t i = 0; i < this->m_mfccBuf.size(0); ++i) {
- this->m_mfccBuf(i, mfccBufIdx) = mfcc[i];
- }
- ++mfccBufIdx;
- }
-
- /* Pad MFCC if needed by adding MFCC for zeros. */
- if (mfccBufIdx != this->m_numFeatureFrames) {
- std::vector<int16_t> zerosWindow = std::vector<int16_t>(this->m_mfccWindowLen, 0);
- std::vector<float> mfccZeros = this->m_mfcc.MfccCompute(zerosWindow);
-
- while (mfccBufIdx != this->m_numFeatureFrames) {
- memcpy(&this->m_mfccBuf(0, mfccBufIdx),
- mfccZeros.data(), sizeof(float) * m_numMfccFeats);
- ++mfccBufIdx;
- }
- }
-
- /* Compute first and second order deltas from MFCCs. */
- AsrPreProcess::ComputeDeltas(this->m_mfccBuf, this->m_delta1Buf, this->m_delta2Buf);
-
- /* Standardize calculated features. */
- this->Standarize();
-
- /* Quantise. */
- QuantParams quantParams = GetTensorQuantParams(this->m_inputTensor);
-
- if (0 == quantParams.scale) {
- printf_err("Quantisation scale can't be 0\n");
- return false;
- }
-
- switch(this->m_inputTensor->type) {
- case kTfLiteUInt8:
- return this->Quantise<uint8_t>(
- tflite::GetTensorData<uint8_t>(this->m_inputTensor), this->m_inputTensor->bytes,
- quantParams.scale, quantParams.offset);
- case kTfLiteInt8:
- return this->Quantise<int8_t>(
- tflite::GetTensorData<int8_t>(this->m_inputTensor), this->m_inputTensor->bytes,
- quantParams.scale, quantParams.offset);
- default:
- printf_err("Unsupported tensor type %s\n",
- TfLiteTypeGetName(this->m_inputTensor->type));
- }
-
- return false;
- }
-
- bool AsrPreProcess::ComputeDeltas(Array2d<float>& mfcc,
- Array2d<float>& delta1,
- Array2d<float>& delta2)
- {
- const std::vector <float> delta1Coeffs =
- {6.66666667e-02, 5.00000000e-02, 3.33333333e-02,
- 1.66666667e-02, -3.46944695e-18, -1.66666667e-02,
- -3.33333333e-02, -5.00000000e-02, -6.66666667e-02};
-
- const std::vector <float> delta2Coeffs =
- {0.06060606, 0.01515152, -0.01731602,
- -0.03679654, -0.04329004, -0.03679654,
- -0.01731602, 0.01515152, 0.06060606};
-
- if (delta1.size(0) == 0 || delta2.size(0) != delta1.size(0) ||
- mfcc.size(0) == 0 || mfcc.size(1) == 0) {
- return false;
- }
-
- /* Get the middle index; coeff vec len should always be odd. */
- const size_t coeffLen = delta1Coeffs.size();
- const size_t fMidIdx = (coeffLen - 1)/2;
- const size_t numFeatures = mfcc.size(0);
- const size_t numFeatVectors = mfcc.size(1);
-
- /* Iterate through features in MFCC vector. */
- for (size_t i = 0; i < numFeatures; ++i) {
- /* For each feature, iterate through time (t) samples representing feature evolution and
- * calculate d/dt and d^2/dt^2, using 1D convolution with differential kernels.
- * Convolution padding = valid, result size is `time length - kernel length + 1`.
- * The result is padded with 0 from both sides to match the size of initial time samples data.
- *
- * For the small filter, conv1D implementation as a simple loop is efficient enough.
- * Filters of a greater size would need CMSIS-DSP functions to be used, like arm_fir_f32.
- */
-
- for (size_t j = fMidIdx; j < numFeatVectors - fMidIdx; ++j) {
- float d1 = 0;
- float d2 = 0;
- const size_t mfccStIdx = j - fMidIdx;
-
- for (size_t k = 0, m = coeffLen - 1; k < coeffLen; ++k, --m) {
-
- d1 += mfcc(i,mfccStIdx + k) * delta1Coeffs[m];
- d2 += mfcc(i,mfccStIdx + k) * delta2Coeffs[m];
- }
-
- delta1(i,j) = d1;
- delta2(i,j) = d2;
- }
- }
-
- return true;
- }
-
- void AsrPreProcess::StandardizeVecF32(Array2d<float>& vec)
- {
- auto mean = math::MathUtils::MeanF32(vec.begin(), vec.totalSize());
- auto stddev = math::MathUtils::StdDevF32(vec.begin(), vec.totalSize(), mean);
-
- debug("Mean: %f, Stddev: %f\n", mean, stddev);
- if (stddev == 0) {
- std::fill(vec.begin(), vec.end(), 0);
- } else {
- const float stddevInv = 1.f/stddev;
- const float normalisedMean = mean/stddev;
-
- auto NormalisingFunction = [=](float& value) {
- value = value * stddevInv - normalisedMean;
- };
- std::for_each(vec.begin(), vec.end(), NormalisingFunction);
- }
- }
-
- void AsrPreProcess::Standarize()
- {
- AsrPreProcess::StandardizeVecF32(this->m_mfccBuf);
- AsrPreProcess::StandardizeVecF32(this->m_delta1Buf);
- AsrPreProcess::StandardizeVecF32(this->m_delta2Buf);
- }
-
- float AsrPreProcess::GetQuantElem(
- const float elem,
- const float quantScale,
- const int quantOffset,
- const float minVal,
- const float maxVal)
- {
- float val = std::round((elem/quantScale) + quantOffset);
- return std::min<float>(std::max<float>(val, minVal), maxVal);
- }
-
-} /* namespace app */
-} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/kws_asr/usecase.cmake b/source/use_case/kws_asr/usecase.cmake
index 40df4d7..59ef450 100644
--- a/source/use_case/kws_asr/usecase.cmake
+++ b/source/use_case/kws_asr/usecase.cmake
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#----------------------------------------------------------------------------
+# Append the APIs to use for this use case
+list(APPEND ${use_case}_API_LIST "kws" "asr")
USER_OPTION(${use_case}_FILE_PATH "Directory with WAV files, or path to a single WAV file, to use in the evaluation application."
${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/samples/
@@ -145,4 +147,4 @@ generate_audio_code(${${use_case}_FILE_PATH} ${SRC_GEN_DIR} ${INC_GEN_DIR}
${${use_case}_AUDIO_OFFSET}
${${use_case}_AUDIO_DURATION}
${${use_case}_AUDIO_RES_TYPE}
- ${${use_case}_AUDIO_MIN_SAMPLES}) \ No newline at end of file
+ ${${use_case}_AUDIO_MIN_SAMPLES})
diff --git a/source/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp b/source/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp
deleted file mode 100644
index cbf0e4e..0000000
--- a/source/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp
+++ /dev/null
@@ -1,341 +0,0 @@
-/*
- * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef RNNOISE_FEATURE_PROCESSOR_HPP
-#define RNNOISE_FEATURE_PROCESSOR_HPP
-
-#include "PlatformMath.hpp"
-#include <cstdint>
-#include <vector>
-#include <array>
-#include <tuple>
-
-namespace arm {
-namespace app {
-namespace rnn {
-
- using vec1D32F = std::vector<float>;
- using vec2D32F = std::vector<vec1D32F>;
- using arrHp = std::array<float, 2>;
- using math::FftInstance;
- using math::FftType;
-
- class FrameFeatures {
- public:
- bool m_silence{false}; /* If frame contains silence or not. */
- vec1D32F m_featuresVec{}; /* Calculated feature vector to feed to model. */
- vec1D32F m_fftX{}; /* Vector of floats arranged to represent complex numbers. */
- vec1D32F m_fftP{}; /* Vector of floats arranged to represent complex numbers. */
- vec1D32F m_Ex{}; /* Spectral band energy for audio x. */
- vec1D32F m_Ep{}; /* Spectral band energy for pitch p. */
- vec1D32F m_Exp{}; /* Correlated spectral energy between x and p. */
- };
-
- /**
- * @brief RNNoise pre and post processing class based on the 2018 paper from
- * Jan-Marc Valin. Recommended reading:
- * - https://jmvalin.ca/demo/rnnoise/
- * - https://arxiv.org/abs/1709.08243
- **/
- class RNNoiseFeatureProcessor {
- /* Public interface */
- public:
- RNNoiseFeatureProcessor();
- ~RNNoiseFeatureProcessor() = default;
-
- /**
- * @brief Calculates the features from a given audio buffer ready to be sent to RNNoise model.
- * @param[in] audioData Pointer to the floating point vector
- * with audio data (within the numerical
- * limits of int16_t type).
- * @param[in] audioLen Number of elements in the audio window.
- * @param[out] features FrameFeatures object reference.
- **/
- void PreprocessFrame(const float* audioData,
- size_t audioLen,
- FrameFeatures& features);
-
- /**
- * @brief Use the RNNoise model output gain values with pre-processing features
- * to generate audio with noise suppressed.
- * @param[in] modelOutput Output gain values from model.
- * @param[in] features Calculated features from pre-processing step.
- * @param[out] outFrame Output frame to be populated.
- **/
- void PostProcessFrame(vec1D32F& modelOutput, FrameFeatures& features, vec1D32F& outFrame);
-
-
- /* Public constants */
- public:
- static constexpr uint32_t FRAME_SIZE_SHIFT{2};
- static constexpr uint32_t FRAME_SIZE{512};
- static constexpr uint32_t WINDOW_SIZE{2 * FRAME_SIZE};
- static constexpr uint32_t FREQ_SIZE{FRAME_SIZE + 1};
-
- static constexpr uint32_t PITCH_MIN_PERIOD{64};
- static constexpr uint32_t PITCH_MAX_PERIOD{820};
- static constexpr uint32_t PITCH_FRAME_SIZE{1024};
- static constexpr uint32_t PITCH_BUF_SIZE{PITCH_MAX_PERIOD + PITCH_FRAME_SIZE};
-
- static constexpr uint32_t NB_BANDS{22};
- static constexpr uint32_t CEPS_MEM{8};
- static constexpr uint32_t NB_DELTA_CEPS{6};
-
- static constexpr uint32_t NB_FEATURES{NB_BANDS + 3*NB_DELTA_CEPS + 2};
-
- /* Private functions */
- private:
-
- /**
- * @brief Initialises the half window and DCT tables.
- */
- void InitTables();
-
- /**
- * @brief Applies a bi-quadratic filter over the audio window.
- * @param[in] bHp Constant coefficient set b (arrHp type).
- * @param[in] aHp Constant coefficient set a (arrHp type).
- * @param[in,out] memHpX Coefficients populated by this function.
- * @param[in,out] audioWindow Floating point vector with audio data.
- **/
- void BiQuad(
- const arrHp& bHp,
- const arrHp& aHp,
- arrHp& memHpX,
- vec1D32F& audioWindow);
-
- /**
- * @brief Computes features from the "filtered" audio window.
- * @param[in] audioWindow Floating point vector with audio data.
- * @param[out] features FrameFeatures object reference.
- **/
- void ComputeFrameFeatures(vec1D32F& audioWindow, FrameFeatures& features);
-
- /**
- * @brief Runs analysis on the audio buffer.
- * @param[in] audioWindow Floating point vector with audio data.
- * @param[out] fft Floating point FFT vector containing real and
- * imaginary pairs of elements. NOTE: this vector
- * does not contain the mirror image (conjugates)
- * part of the spectrum.
- * @param[out] energy Computed energy for each band in the Bark scale.
- * @param[out] analysisMem Buffer sequentially, but partially,
- * populated with new audio data.
- **/
- void FrameAnalysis(
- const vec1D32F& audioWindow,
- vec1D32F& fft,
- vec1D32F& energy,
- vec1D32F& analysisMem);
-
- /**
- * @brief Applies the window function, in-place, over the given
- * floating point buffer.
- * @param[in,out] x Buffer the window will be applied to.
- **/
- void ApplyWindow(vec1D32F& x);
-
- /**
- * @brief Computes the FFT for a given vector.
- * @param[in] x Vector to compute the FFT from.
- * @param[out] fft Floating point FFT vector containing real and
- * imaginary pairs of elements. NOTE: this vector
- * does not contain the mirror image (conjugates)
- * part of the spectrum.
- **/
- void ForwardTransform(
- vec1D32F& x,
- vec1D32F& fft);
-
- /**
- * @brief Computes band energy for each of the 22 Bark scale bands.
- * @param[in] fft_X FFT spectrum (as computed by ForwardTransform).
- * @param[out] bandE Vector with 22 elements populated with energy for
- * each band.
- **/
- void ComputeBandEnergy(const vec1D32F& fft_X, vec1D32F& bandE);
-
- /**
- * @brief Computes band energy correlation.
- * @param[in] X FFT vector X.
- * @param[in] P FFT vector P.
- * @param[out] bandC Vector with 22 elements populated with band energy
- * correlation for the two input FFT vectors.
- **/
- void ComputeBandCorr(const vec1D32F& X, const vec1D32F& P, vec1D32F& bandC);
-
- /**
- * @brief Performs pitch auto-correlation for a given vector for
- * given lag.
- * @param[in] x Input vector.
- * @param[out] ac Auto-correlation output vector.
- * @param[in] lag Lag value.
- * @param[in] n Number of elements to consider for correlation
- * computation.
- **/
- void AutoCorr(const vec1D32F &x,
- vec1D32F &ac,
- size_t lag,
- size_t n);
-
- /**
- * @brief Computes pitch cross-correlation.
- * @param[in] x Input vector 1.
- * @param[in] y Input vector 2.
- * @param[out] xCorr Cross-correlation output vector.
- * @param[in] len Number of elements to consider for correlation.
- * computation.
- * @param[in] maxPitch Maximum pitch.
- **/
- void PitchXCorr(
- const vec1D32F& x,
- const vec1D32F& y,
- vec1D32F& xCorr,
- size_t len,
- size_t maxPitch);
-
- /**
- * @brief Computes "Linear Predictor Coefficients".
- * @param[in] ac Correlation vector.
- * @param[in] p Number of elements of input vector to consider.
- * @param[out] lpc Output coefficients vector.
- **/
- void LPC(const vec1D32F& ac, int32_t p, vec1D32F& lpc);
-
- /**
- * @brief Custom FIR implementation.
- * @param[in] num FIR coefficient vector.
- * @param[in] N Number of elements.
- * @param[out] x Vector to be be processed.
- **/
- void Fir5(const vec1D32F& num, uint32_t N, vec1D32F& x);
-
- /**
- * @brief Down-sample the pitch buffer.
- * @param[in,out] pitchBuf Pitch buffer.
- * @param[in] pitchBufSz Buffer size.
- **/
- void PitchDownsample(vec1D32F& pitchBuf, size_t pitchBufSz);
-
- /**
- * @brief Pitch search function.
- * @param[in] xLP Shifted pitch buffer input.
- * @param[in] y Pitch buffer input.
- * @param[in] len Length to search for.
- * @param[in] maxPitch Maximum pitch.
- * @return pitch index.
- **/
- int PitchSearch(vec1D32F& xLp, vec1D32F& y, uint32_t len, uint32_t maxPitch);
-
- /**
- * @brief Finds the "best" pitch from the buffer.
- * @param[in] xCorr Pitch correlation vector.
- * @param[in] y Pitch buffer input.
- * @param[in] len Length to search for.
- * @param[in] maxPitch Maximum pitch.
- * @return pitch array (2 elements).
- **/
- arrHp FindBestPitch(vec1D32F& xCorr, vec1D32F& y, uint32_t len, uint32_t maxPitch);
-
- /**
- * @brief Remove pitch period doubling errors.
- * @param[in,out] pitchBuf Pitch buffer vector.
- * @param[in] maxPeriod Maximum period.
- * @param[in] minPeriod Minimum period.
- * @param[in] frameSize Frame size.
- * @param[in] pitchIdx0_ Pitch index 0.
- * @return pitch index.
- **/
- int RemoveDoubling(
- vec1D32F& pitchBuf,
- uint32_t maxPeriod,
- uint32_t minPeriod,
- uint32_t frameSize,
- size_t pitchIdx0_);
-
- /**
- * @brief Computes pitch gain.
- * @param[in] xy Single xy cross correlation value.
- * @param[in] xx Single xx auto correlation value.
- * @param[in] yy Single yy auto correlation value.
- * @return Calculated pitch gain.
- **/
- float ComputePitchGain(float xy, float xx, float yy);
-
- /**
- * @brief Computes DCT vector from the given input.
- * @param[in] input Input vector.
- * @param[out] output Output vector with DCT coefficients.
- **/
- void DCT(vec1D32F& input, vec1D32F& output);
-
- /**
- * @brief Perform inverse fourier transform on complex spectral vector.
- * @param[out] out Output vector.
- * @param[in] fftXIn Vector of floats arranged to represent complex numbers interleaved.
- **/
- void InverseTransform(vec1D32F& out, vec1D32F& fftXIn);
-
- /**
- * @brief Perform pitch filtering.
- * @param[in] features Object with pre-processing calculated frame features.
- * @param[in] g Gain values.
- **/
- void PitchFilter(FrameFeatures& features, vec1D32F& g);
-
- /**
- * @brief Interpolate the band gain values.
- * @param[out] g Gain values.
- * @param[in] bandE Vector with 22 elements populated with energy for
- * each band.
- **/
- void InterpBandGain(vec1D32F& g, vec1D32F& bandE);
-
- /**
- * @brief Create de-noised frame.
- * @param[out] outFrame Output vector for storing the created audio frame.
- * @param[in] fftY Gain adjusted complex spectral vector.
- */
- void FrameSynthesis(vec1D32F& outFrame, vec1D32F& fftY);
-
- /* Private objects */
- private:
- FftInstance m_fftInstReal; /* FFT instance for real numbers */
- FftInstance m_fftInstCmplx; /* FFT instance for complex numbers */
- vec1D32F m_halfWindow; /* Window coefficients */
- vec1D32F m_dctTable; /* DCT table */
- vec1D32F m_analysisMem; /* Buffer used for frame analysis */
- vec2D32F m_cepstralMem; /* Cepstral coefficients */
- size_t m_memId; /* memory ID */
- vec1D32F m_synthesisMem; /* Synthesis mem (used by post-processing) */
- vec1D32F m_pitchBuf; /* Pitch buffer */
- float m_lastGain; /* Last gain calculated */
- int m_lastPeriod; /* Last period calculated */
- arrHp m_memHpX; /* HpX coefficients. */
- vec1D32F m_lastGVec; /* Last gain vector (used by post-processing) */
-
- /* Constants */
- const std::array <uint32_t, NB_BANDS> m_eband5ms {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12,
- 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100};
- };
-
-
-} /* namespace rnn */
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* RNNOISE_FEATURE_PROCESSOR_HPP */
diff --git a/source/use_case/noise_reduction/include/RNNoiseModel.hpp b/source/use_case/noise_reduction/include/RNNoiseModel.hpp
deleted file mode 100644
index f6e4510..0000000
--- a/source/use_case/noise_reduction/include/RNNoiseModel.hpp
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef RNNOISE_MODEL_HPP
-#define RNNOISE_MODEL_HPP
-
-#include "Model.hpp"
-
-extern const uint32_t g_NumInputFeatures;
-extern const uint32_t g_FrameLength;
-extern const uint32_t g_FrameStride;
-
-namespace arm {
-namespace app {
-
- class RNNoiseModel : public Model {
- public:
- /**
- * @brief Runs inference for RNNoise model.
- *
- * Call CopyGruStates so GRU state outputs are copied to GRU state inputs before the inference run.
- * Run ResetGruState() method to set states to zero before starting processing logically related data.
- * @return True if inference succeeded, False - otherwise
- */
- bool RunInference() override;
-
- /**
- * @brief Sets GRU input states to zeros.
- * Call this method before starting processing the new sequence of logically related data.
- */
- void ResetGruState();
-
- /**
- * @brief Copy current GRU output states to input states.
- * Call this method before starting processing the next sequence of logically related data.
- */
- bool CopyGruStates();
-
- /* Which index of model outputs does the main output (gains) come from. */
- const size_t m_indexForModelOutput = 1;
-
- protected:
- /** @brief Gets the reference to op resolver interface class. */
- const tflite::MicroOpResolver& GetOpResolver() override;
-
- /** @brief Adds operations to the op resolver instance. */
- bool EnlistOperations() override;
-
- const uint8_t* ModelPointer() override;
-
- size_t ModelSize() override;
-
- /*
- Each inference after the first needs to copy 3 GRU states from a output index to input index (model dependent):
- 0 -> 3, 2 -> 2, 3 -> 1
- */
- const std::vector<std::pair<size_t, size_t>> m_gruStateMap = {{0,3}, {2, 2}, {3, 1}};
- private:
- /* Maximum number of individual operations that can be enlisted. */
- static constexpr int ms_maxOpCnt = 15;
-
- /* A mutable op resolver instance. */
- tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* RNNOISE_MODEL_HPP */ \ No newline at end of file
diff --git a/source/use_case/noise_reduction/include/RNNoiseProcessing.hpp b/source/use_case/noise_reduction/include/RNNoiseProcessing.hpp
deleted file mode 100644
index 15e62d9..0000000
--- a/source/use_case/noise_reduction/include/RNNoiseProcessing.hpp
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef RNNOISE_PROCESSING_HPP
-#define RNNOISE_PROCESSING_HPP
-
-#include "BaseProcessing.hpp"
-#include "Model.hpp"
-#include "RNNoiseFeatureProcessor.hpp"
-
-namespace arm {
-namespace app {
-
- /**
- * @brief Pre-processing class for Noise Reduction use case.
- * Implements methods declared by BasePreProcess and anything else needed
- * to populate input tensors ready for inference.
- */
- class RNNoisePreProcess : public BasePreProcess {
-
- public:
- /**
- * @brief Constructor
- * @param[in] inputTensor Pointer to the TFLite Micro input Tensor.
- * @param[in/out] featureProcessor RNNoise specific feature extractor object.
- * @param[in/out] frameFeatures RNNoise specific features shared between pre & post-processing.
- *
- **/
- explicit RNNoisePreProcess(TfLiteTensor* inputTensor,
- std::shared_ptr<rnn::RNNoiseFeatureProcessor> featureProcessor,
- std::shared_ptr<rnn::FrameFeatures> frameFeatures);
-
- /**
- * @brief Should perform pre-processing of 'raw' input audio data and load it into
- * TFLite Micro input tensors ready for inference
- * @param[in] input Pointer to the data that pre-processing will work on.
- * @param[in] inputSize Size of the input data.
- * @return true if successful, false otherwise.
- **/
- bool DoPreProcess(const void* input, size_t inputSize) override;
-
- private:
- TfLiteTensor* m_inputTensor; /* Model input tensor. */
- std::shared_ptr<rnn::RNNoiseFeatureProcessor> m_featureProcessor; /* RNNoise feature processor shared between pre & post-processing. */
- std::shared_ptr<rnn::FrameFeatures> m_frameFeatures; /* RNNoise features shared between pre & post-processing. */
- rnn::vec1D32F m_audioFrame; /* Audio frame cast to FP32 */
-
- /**
- * @brief Quantize the given features and populate the input Tensor.
- * @param[in] inputFeatures Vector of floating point features to quantize.
- * @param[in] quantScale Quantization scale for the inputTensor.
- * @param[in] quantOffset Quantization offset for the inputTensor.
- * @param[in,out] inputTensor TFLite micro tensor to populate.
- **/
- static void QuantizeAndPopulateInput(rnn::vec1D32F& inputFeatures,
- float quantScale, int quantOffset,
- TfLiteTensor* inputTensor);
- };
-
- /**
- * @brief Post-processing class for Noise Reduction use case.
- * Implements methods declared by BasePostProcess and anything else needed
- * to populate result vector.
- */
- class RNNoisePostProcess : public BasePostProcess {
-
- public:
- /**
- * @brief Constructor
- * @param[in] outputTensor Pointer to the TFLite Micro output Tensor.
- * @param[out] denoisedAudioFrame Vector to store the final denoised audio frame.
- * @param[in/out] featureProcessor RNNoise specific feature extractor object.
- * @param[in/out] frameFeatures RNNoise specific features shared between pre & post-processing.
- **/
- RNNoisePostProcess(TfLiteTensor* outputTensor,
- std::vector<int16_t>& denoisedAudioFrame,
- std::shared_ptr<rnn::RNNoiseFeatureProcessor> featureProcessor,
- std::shared_ptr<rnn::FrameFeatures> frameFeatures);
-
- /**
- * @brief Should perform post-processing of the result of inference then
- * populate result data for any later use.
- * @return true if successful, false otherwise.
- **/
- bool DoPostProcess() override;
-
- private:
- TfLiteTensor* m_outputTensor; /* Model output tensor. */
- std::vector<int16_t>& m_denoisedAudioFrame; /* Vector to store the final denoised frame. */
- rnn::vec1D32F m_denoisedAudioFrameFloat; /* Internal vector to store the final denoised frame (FP32). */
- std::shared_ptr<rnn::RNNoiseFeatureProcessor> m_featureProcessor; /* RNNoise feature processor shared between pre & post-processing. */
- std::shared_ptr<rnn::FrameFeatures> m_frameFeatures; /* RNNoise features shared between pre & post-processing. */
- std::vector<float> m_modelOutputFloat; /* Internal vector to store de-quantized model output. */
-
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* RNNOISE_PROCESSING_HPP */ \ No newline at end of file
diff --git a/source/use_case/noise_reduction/src/MainLoop.cc b/source/use_case/noise_reduction/src/MainLoop.cc
index fd72127..4c74a48 100644
--- a/source/use_case/noise_reduction/src/MainLoop.cc
+++ b/source/use_case/noise_reduction/src/MainLoop.cc
@@ -18,7 +18,17 @@
#include "UseCaseCommonUtils.hpp" /* Utils functions. */
#include "RNNoiseModel.hpp" /* Model class for running inference. */
#include "InputFiles.hpp" /* For input audio clips. */
-#include "log_macros.h"
+#include "log_macros.h" /* Logging functions */
+#include "BufAttributes.hpp" /* Buffer attributes to be applied */
+
+namespace arm {
+ namespace app {
+ static uint8_t tensorArena[ACTIVATION_BUF_SZ] ACTIVATION_BUF_ATTRIBUTE;
+ } /* namespace app */
+} /* namespace arm */
+
+extern uint8_t* GetModelPointer();
+extern size_t GetModelLen();
enum opcodes
{
@@ -62,10 +72,22 @@ void main_loop()
constexpr bool bUseMenu = NUMBER_OF_FILES > 1 ? true : false;
/* Load the model. */
- if (!model.Init()) {
+ if (!model.Init(arm::app::tensorArena,
+ sizeof(arm::app::tensorArena),
+ GetModelPointer(),
+ GetModelLen())) {
printf_err("Failed to initialise model\n");
return;
}
+
+#if !defined(ARM_NPU)
+ /* If it is not a NPU build check if the model contains a NPU operator */
+ if (model.ContainsEthosUOperator()) {
+ printf_err("No driver support for Ethos-U operator found in the model.\n");
+ return;
+ }
+#endif /* ARM_NPU */
+
/* Instantiate application context. */
arm::app::ApplicationContext caseContext;
@@ -124,4 +146,4 @@ void main_loop()
}
} while (executionSuccessful && bUseMenu);
info("Main loop terminated.\n");
-} \ No newline at end of file
+}
diff --git a/source/use_case/noise_reduction/src/RNNoiseFeatureProcessor.cc b/source/use_case/noise_reduction/src/RNNoiseFeatureProcessor.cc
deleted file mode 100644
index 036894c..0000000
--- a/source/use_case/noise_reduction/src/RNNoiseFeatureProcessor.cc
+++ /dev/null
@@ -1,892 +0,0 @@
-/*
- * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "RNNoiseFeatureProcessor.hpp"
-#include "log_macros.h"
-
-#include <algorithm>
-#include <cmath>
-#include <cstring>
-
-namespace arm {
-namespace app {
-namespace rnn {
-
-#define VERIFY(x) \
-do { \
- if (!(x)) { \
- printf_err("Assert failed:" #x "\n"); \
- exit(1); \
- } \
-} while(0)
-
-RNNoiseFeatureProcessor::RNNoiseFeatureProcessor() :
- m_halfWindow(FRAME_SIZE, 0),
- m_dctTable(NB_BANDS * NB_BANDS),
- m_analysisMem(FRAME_SIZE, 0),
- m_cepstralMem(CEPS_MEM, vec1D32F(NB_BANDS, 0)),
- m_memId{0},
- m_synthesisMem(FRAME_SIZE, 0),
- m_pitchBuf(PITCH_BUF_SIZE, 0),
- m_lastGain{0.0},
- m_lastPeriod{0},
- m_memHpX{},
- m_lastGVec(NB_BANDS, 0)
-{
- constexpr uint32_t numFFt = 2 * FRAME_SIZE;
- static_assert(numFFt != 0, "Num FFT can't be 0");
-
- math::MathUtils::FftInitF32(numFFt, this->m_fftInstReal, FftType::real);
- math::MathUtils::FftInitF32(numFFt, this->m_fftInstCmplx, FftType::complex);
- this->InitTables();
-}
-
-void RNNoiseFeatureProcessor::PreprocessFrame(const float* audioData,
- const size_t audioLen,
- FrameFeatures& features)
-{
- /* Note audioWindow is modified in place */
- const arrHp aHp {-1.99599, 0.99600 };
- const arrHp bHp {-2.00000, 1.00000 };
-
- vec1D32F audioWindow{audioData, audioData + audioLen};
-
- this->BiQuad(bHp, aHp, this->m_memHpX, audioWindow);
- this->ComputeFrameFeatures(audioWindow, features);
-}
-
-void RNNoiseFeatureProcessor::PostProcessFrame(vec1D32F& modelOutput, FrameFeatures& features, vec1D32F& outFrame)
-{
- std::vector<float> outputBands = modelOutput;
- std::vector<float> gain(FREQ_SIZE, 0);
-
- if (!features.m_silence) {
- PitchFilter(features, outputBands);
- for (size_t i = 0; i < NB_BANDS; i++) {
- float alpha = .6f;
- outputBands[i] = std::max(outputBands[i], alpha * m_lastGVec[i]);
- m_lastGVec[i] = outputBands[i];
- }
- InterpBandGain(gain, outputBands);
- for (size_t i = 0; i < FREQ_SIZE; i++) {
- features.m_fftX[2 * i] *= gain[i]; /* Real. */
- features.m_fftX[2 * i + 1] *= gain[i]; /*imaginary. */
-
- }
-
- }
-
- FrameSynthesis(outFrame, features.m_fftX);
-}
-
-void RNNoiseFeatureProcessor::InitTables()
-{
- constexpr float pi = M_PI;
- constexpr float halfPi = M_PI / 2;
- constexpr float halfPiOverFrameSz = halfPi/FRAME_SIZE;
-
- for (uint32_t i = 0; i < FRAME_SIZE; i++) {
- const float sinVal = math::MathUtils::SineF32(halfPiOverFrameSz * (i + 0.5f));
- m_halfWindow[i] = math::MathUtils::SineF32(halfPi * sinVal * sinVal);
- }
-
- for (uint32_t i = 0; i < NB_BANDS; i++) {
- for (uint32_t j = 0; j < NB_BANDS; j++) {
- m_dctTable[i * NB_BANDS + j] = math::MathUtils::CosineF32((i + 0.5f) * j * pi / NB_BANDS);
- }
- m_dctTable[i * NB_BANDS] *= math::MathUtils::SqrtF32(0.5f);
- }
-}
-
-void RNNoiseFeatureProcessor::BiQuad(
- const arrHp& bHp,
- const arrHp& aHp,
- arrHp& memHpX,
- vec1D32F& audioWindow)
-{
- for (float& audioElement : audioWindow) {
- const auto xi = audioElement;
- const auto yi = audioElement + memHpX[0];
- memHpX[0] = memHpX[1] + (bHp[0] * xi - aHp[0] * yi);
- memHpX[1] = (bHp[1] * xi - aHp[1] * yi);
- audioElement = yi;
- }
-}
-
-void RNNoiseFeatureProcessor::ComputeFrameFeatures(vec1D32F& audioWindow,
- FrameFeatures& features)
-{
- this->FrameAnalysis(audioWindow,
- features.m_fftX,
- features.m_Ex,
- this->m_analysisMem);
-
- float energy = 0.0;
-
- vec1D32F Ly(NB_BANDS, 0);
- vec1D32F p(WINDOW_SIZE, 0);
- vec1D32F pitchBuf(PITCH_BUF_SIZE >> 1, 0);
-
- VERIFY(PITCH_BUF_SIZE >= this->m_pitchBuf.size());
- std::copy_n(this->m_pitchBuf.begin() + FRAME_SIZE,
- PITCH_BUF_SIZE - FRAME_SIZE,
- this->m_pitchBuf.begin());
-
- VERIFY(FRAME_SIZE <= audioWindow.size() && PITCH_BUF_SIZE > FRAME_SIZE);
- std::copy_n(audioWindow.begin(),
- FRAME_SIZE,
- this->m_pitchBuf.begin() + PITCH_BUF_SIZE - FRAME_SIZE);
-
- this->PitchDownsample(pitchBuf, PITCH_BUF_SIZE);
-
- VERIFY(pitchBuf.size() > PITCH_MAX_PERIOD/2);
- vec1D32F xLp(pitchBuf.size() - PITCH_MAX_PERIOD/2, 0);
- std::copy_n(pitchBuf.begin() + PITCH_MAX_PERIOD/2, xLp.size(), xLp.begin());
-
- int pitchIdx = this->PitchSearch(xLp, pitchBuf,
- PITCH_FRAME_SIZE, (PITCH_MAX_PERIOD - (3*PITCH_MIN_PERIOD)));
-
- pitchIdx = this->RemoveDoubling(
- pitchBuf,
- PITCH_MAX_PERIOD,
- PITCH_MIN_PERIOD,
- PITCH_FRAME_SIZE,
- PITCH_MAX_PERIOD - pitchIdx);
-
- size_t stIdx = PITCH_BUF_SIZE - WINDOW_SIZE - pitchIdx;
- VERIFY((static_cast<int>(PITCH_BUF_SIZE) - static_cast<int>(WINDOW_SIZE) - pitchIdx) >= 0);
- std::copy_n(this->m_pitchBuf.begin() + stIdx, WINDOW_SIZE, p.begin());
-
- this->ApplyWindow(p);
- this->ForwardTransform(p, features.m_fftP);
- this->ComputeBandEnergy(features.m_fftP, features.m_Ep);
- this->ComputeBandCorr(features.m_fftX, features.m_fftP, features.m_Exp);
-
- for (uint32_t i = 0 ; i < NB_BANDS; ++i) {
- features.m_Exp[i] /= math::MathUtils::SqrtF32(
- 0.001f + features.m_Ex[i] * features.m_Ep[i]);
- }
-
- vec1D32F dctVec(NB_BANDS, 0);
- this->DCT(features.m_Exp, dctVec);
-
- features.m_featuresVec = vec1D32F (NB_FEATURES, 0);
- for (uint32_t i = 0; i < NB_DELTA_CEPS; ++i) {
- features.m_featuresVec[NB_BANDS + 2*NB_DELTA_CEPS + i] = dctVec[i];
- }
-
- features.m_featuresVec[NB_BANDS + 2*NB_DELTA_CEPS] -= 1.3;
- features.m_featuresVec[NB_BANDS + 2*NB_DELTA_CEPS + 1] -= 0.9;
- features.m_featuresVec[NB_BANDS + 3*NB_DELTA_CEPS] = 0.01 * (static_cast<int>(pitchIdx) - 300);
-
- float logMax = -2.f;
- float follow = -2.f;
- for (uint32_t i = 0; i < NB_BANDS; ++i) {
- Ly[i] = log10f(1e-2f + features.m_Ex[i]);
- Ly[i] = std::max<float>(logMax - 7, std::max<float>(follow - 1.5, Ly[i]));
- logMax = std::max<float>(logMax, Ly[i]);
- follow = std::max<float>(follow - 1.5, Ly[i]);
- energy += features.m_Ex[i];
- }
-
- /* If there's no audio avoid messing up the state. */
- features.m_silence = true;
- if (energy < 0.04) {
- return;
- } else {
- features.m_silence = false;
- }
-
- this->DCT(Ly, features.m_featuresVec);
- features.m_featuresVec[0] -= 12.0;
- features.m_featuresVec[1] -= 4.0;
-
- VERIFY(CEPS_MEM > 2);
- uint32_t stIdx1 = this->m_memId < 1 ? CEPS_MEM + this->m_memId - 1 : this->m_memId - 1;
- uint32_t stIdx2 = this->m_memId < 2 ? CEPS_MEM + this->m_memId - 2 : this->m_memId - 2;
- VERIFY(stIdx1 < this->m_cepstralMem.size());
- VERIFY(stIdx2 < this->m_cepstralMem.size());
- auto ceps1 = this->m_cepstralMem[stIdx1];
- auto ceps2 = this->m_cepstralMem[stIdx2];
-
- /* Ceps 0 */
- for (uint32_t i = 0; i < NB_BANDS; ++i) {
- this->m_cepstralMem[this->m_memId][i] = features.m_featuresVec[i];
- }
-
- for (uint32_t i = 0; i < NB_DELTA_CEPS; ++i) {
- features.m_featuresVec[i] = this->m_cepstralMem[this->m_memId][i] + ceps1[i] + ceps2[i];
- features.m_featuresVec[NB_BANDS + i] = this->m_cepstralMem[this->m_memId][i] - ceps2[i];
- features.m_featuresVec[NB_BANDS + NB_DELTA_CEPS + i] =
- this->m_cepstralMem[this->m_memId][i] - 2 * ceps1[i] + ceps2[i];
- }
-
- /* Spectral variability features. */
- this->m_memId += 1;
- if (this->m_memId == CEPS_MEM) {
- this->m_memId = 0;
- }
-
- float specVariability = 0.f;
-
- VERIFY(this->m_cepstralMem.size() >= CEPS_MEM);
- for (size_t i = 0; i < CEPS_MEM; ++i) {
- float minDist = 1e15;
- for (size_t j = 0; j < CEPS_MEM; ++j) {
- float dist = 0.f;
- for (size_t k = 0; k < NB_BANDS; ++k) {
- VERIFY(this->m_cepstralMem[i].size() >= NB_BANDS);
- auto tmp = this->m_cepstralMem[i][k] - this->m_cepstralMem[j][k];
- dist += tmp * tmp;
- }
-
- if (j != i) {
- minDist = std::min<float>(minDist, dist);
- }
- }
- specVariability += minDist;
- }
-
- VERIFY(features.m_featuresVec.size() >= NB_BANDS + 3 * NB_DELTA_CEPS + 1);
- features.m_featuresVec[NB_BANDS + 3 * NB_DELTA_CEPS + 1] = specVariability / CEPS_MEM - 2.1;
-}
-
-void RNNoiseFeatureProcessor::FrameAnalysis(
- const vec1D32F& audioWindow,
- vec1D32F& fft,
- vec1D32F& energy,
- vec1D32F& analysisMem)
-{
- vec1D32F x(WINDOW_SIZE, 0);
-
- /* Move old audio down and populate end with latest audio window. */
- VERIFY(x.size() >= FRAME_SIZE && analysisMem.size() >= FRAME_SIZE);
- VERIFY(audioWindow.size() >= FRAME_SIZE);
-
- std::copy_n(analysisMem.begin(), FRAME_SIZE, x.begin());
- std::copy_n(audioWindow.begin(), x.size() - FRAME_SIZE, x.begin() + FRAME_SIZE);
- std::copy_n(audioWindow.begin(), FRAME_SIZE, analysisMem.begin());
-
- this->ApplyWindow(x);
-
- /* Calculate FFT. */
- ForwardTransform(x, fft);
-
- /* Compute band energy. */
- ComputeBandEnergy(fft, energy);
-}
-
-void RNNoiseFeatureProcessor::ApplyWindow(vec1D32F& x)
-{
- if (WINDOW_SIZE != x.size()) {
- printf_err("Invalid size for vector to be windowed\n");
- return;
- }
-
- VERIFY(this->m_halfWindow.size() >= FRAME_SIZE);
-
- /* Multiply input by sinusoidal function. */
- for (size_t i = 0; i < FRAME_SIZE; i++) {
- x[i] *= this->m_halfWindow[i];
- x[WINDOW_SIZE - 1 - i] *= this->m_halfWindow[i];
- }
-}
-
-void RNNoiseFeatureProcessor::ForwardTransform(
- vec1D32F& x,
- vec1D32F& fft)
-{
- /* The input vector can be modified by the fft function. */
- fft.reserve(x.size() + 2);
- fft.resize(x.size() + 2, 0);
- math::MathUtils::FftF32(x, fft, this->m_fftInstReal);
-
- /* Normalise. */
- for (auto& f : fft) {
- f /= this->m_fftInstReal.m_fftLen;
- }
-
- /* Place the last freq element correctly */
- fft[fft.size()-2] = fft[1];
- fft[1] = 0;
-
- /* NOTE: We don't truncate out FFT vector as it already contains only the
- * first half of the FFT's. The conjugates are not present. */
-}
-
-void RNNoiseFeatureProcessor::ComputeBandEnergy(const vec1D32F& fftX, vec1D32F& bandE)
-{
- bandE = vec1D32F(NB_BANDS, 0);
-
- VERIFY(this->m_eband5ms.size() >= NB_BANDS);
- for (uint32_t i = 0; i < NB_BANDS - 1; i++) {
- const auto bandSize = (this->m_eband5ms[i + 1] - this->m_eband5ms[i])
- << FRAME_SIZE_SHIFT;
-
- for (uint32_t j = 0; j < bandSize; j++) {
- const auto frac = static_cast<float>(j) / bandSize;
- const auto idx = (this->m_eband5ms[i] << FRAME_SIZE_SHIFT) + j;
-
- auto tmp = fftX[2 * idx] * fftX[2 * idx]; /* Real part */
- tmp += fftX[2 * idx + 1] * fftX[2 * idx + 1]; /* Imaginary part */
-
- bandE[i] += (1 - frac) * tmp;
- bandE[i + 1] += frac * tmp;
- }
- }
- bandE[0] *= 2;
- bandE[NB_BANDS - 1] *= 2;
-}
-
-void RNNoiseFeatureProcessor::ComputeBandCorr(const vec1D32F& X, const vec1D32F& P, vec1D32F& bandC)
-{
- bandC = vec1D32F(NB_BANDS, 0);
- VERIFY(this->m_eband5ms.size() >= NB_BANDS);
-
- for (uint32_t i = 0; i < NB_BANDS - 1; i++) {
- const auto bandSize = (this->m_eband5ms[i + 1] - this->m_eband5ms[i]) << FRAME_SIZE_SHIFT;
-
- for (uint32_t j = 0; j < bandSize; j++) {
- const auto frac = static_cast<float>(j) / bandSize;
- const auto idx = (this->m_eband5ms[i] << FRAME_SIZE_SHIFT) + j;
-
- auto tmp = X[2 * idx] * P[2 * idx]; /* Real part */
- tmp += X[2 * idx + 1] * P[2 * idx + 1]; /* Imaginary part */
-
- bandC[i] += (1 - frac) * tmp;
- bandC[i + 1] += frac * tmp;
- }
- }
- bandC[0] *= 2;
- bandC[NB_BANDS - 1] *= 2;
-}
-
-void RNNoiseFeatureProcessor::DCT(vec1D32F& input, vec1D32F& output)
-{
- VERIFY(this->m_dctTable.size() >= NB_BANDS * NB_BANDS);
- for (uint32_t i = 0; i < NB_BANDS; ++i) {
- float sum = 0;
-
- for (uint32_t j = 0, k = 0; j < NB_BANDS; ++j, k += NB_BANDS) {
- sum += input[j] * this->m_dctTable[k + i];
- }
- output[i] = sum * math::MathUtils::SqrtF32(2.0/22);
- }
-}
-
-void RNNoiseFeatureProcessor::PitchDownsample(vec1D32F& pitchBuf, size_t pitchBufSz) {
- for (size_t i = 1; i < (pitchBufSz >> 1); ++i) {
- pitchBuf[i] = 0.5 * (
- 0.5 * (this->m_pitchBuf[2 * i - 1] + this->m_pitchBuf[2 * i + 1])
- + this->m_pitchBuf[2 * i]);
- }
-
- pitchBuf[0] = 0.5*(0.5*(this->m_pitchBuf[1]) + this->m_pitchBuf[0]);
-
- vec1D32F ac(5, 0);
- size_t numLags = 4;
-
- this->AutoCorr(pitchBuf, ac, numLags, pitchBufSz >> 1);
-
- /* Noise floor -40db */
- ac[0] *= 1.0001;
-
- /* Lag windowing. */
- for (size_t i = 1; i < numLags + 1; ++i) {
- ac[i] -= ac[i] * (0.008 * i) * (0.008 * i);
- }
-
- vec1D32F lpc(numLags, 0);
- this->LPC(ac, numLags, lpc);
-
- float tmp = 1.0;
- for (size_t i = 0; i < numLags; ++i) {
- tmp = 0.9f * tmp;
- lpc[i] = lpc[i] * tmp;
- }
-
- vec1D32F lpc2(numLags + 1, 0);
- float c1 = 0.8;
-
- /* Add a zero. */
- lpc2[0] = lpc[0] + 0.8;
- lpc2[1] = lpc[1] + (c1 * lpc[0]);
- lpc2[2] = lpc[2] + (c1 * lpc[1]);
- lpc2[3] = lpc[3] + (c1 * lpc[2]);
- lpc2[4] = (c1 * lpc[3]);
-
- this->Fir5(lpc2, pitchBufSz >> 1, pitchBuf);
-}
-
-int RNNoiseFeatureProcessor::PitchSearch(vec1D32F& xLp, vec1D32F& y, uint32_t len, uint32_t maxPitch) {
- uint32_t lag = len + maxPitch;
- vec1D32F xLp4(len >> 2, 0);
- vec1D32F yLp4(lag >> 2, 0);
- vec1D32F xCorr(maxPitch >> 1, 0);
-
- /* Downsample by 2 again. */
- for (size_t j = 0; j < (len >> 2); ++j) {
- xLp4[j] = xLp[2*j];
- }
- for (size_t j = 0; j < (lag >> 2); ++j) {
- yLp4[j] = y[2*j];
- }
-
- this->PitchXCorr(xLp4, yLp4, xCorr, len >> 2, maxPitch >> 2);
-
- /* Coarse search with 4x decimation. */
- arrHp bestPitch = this->FindBestPitch(xCorr, yLp4, len >> 2, maxPitch >> 2);
-
- /* Finer search with 2x decimation. */
- const int maxIdx = (maxPitch >> 1);
- for (int i = 0; i < maxIdx; ++i) {
- xCorr[i] = 0;
- if (std::abs(i - 2*bestPitch[0]) > 2 and std::abs(i - 2*bestPitch[1]) > 2) {
- continue;
- }
- float sum = 0;
- for (size_t j = 0; j < len >> 1; ++j) {
- sum += xLp[j] * y[i+j];
- }
-
- xCorr[i] = std::max(-1.0f, sum);
- }
-
- bestPitch = this->FindBestPitch(xCorr, y, len >> 1, maxPitch >> 1);
-
- int offset;
- /* Refine by pseudo-interpolation. */
- if ( 0 < bestPitch[0] && bestPitch[0] < ((maxPitch >> 1) - 1)) {
- float a = xCorr[bestPitch[0] - 1];
- float b = xCorr[bestPitch[0]];
- float c = xCorr[bestPitch[0] + 1];
-
- if ( (c-a) > 0.7*(b-a) ) {
- offset = 1;
- } else if ( (a-c) > 0.7*(b-c) ) {
- offset = -1;
- } else {
- offset = 0;
- }
- } else {
- offset = 0;
- }
-
- return 2*bestPitch[0] - offset;
-}
-
-arrHp RNNoiseFeatureProcessor::FindBestPitch(vec1D32F& xCorr, vec1D32F& y, uint32_t len, uint32_t maxPitch)
-{
- float Syy = 1;
- arrHp bestNum {-1, -1};
- arrHp bestDen {0, 0};
- arrHp bestPitch {0, 1};
-
- for (size_t j = 0; j < len; ++j) {
- Syy += (y[j] * y[j]);
- }
-
- for (size_t i = 0; i < maxPitch; ++i ) {
- if (xCorr[i] > 0) {
- float xCorr16 = xCorr[i] * 1e-12f; /* Avoid problems when squaring. */
-
- float num = xCorr16 * xCorr16;
- if (num*bestDen[1] > bestNum[1]*Syy) {
- if (num*bestDen[0] > bestNum[0]*Syy) {
- bestNum[1] = bestNum[0];
- bestDen[1] = bestDen[0];
- bestPitch[1] = bestPitch[0];
- bestNum[0] = num;
- bestDen[0] = Syy;
- bestPitch[0] = i;
- } else {
- bestNum[1] = num;
- bestDen[1] = Syy;
- bestPitch[1] = i;
- }
- }
- }
-
- Syy += (y[i+len]*y[i+len]) - (y[i]*y[i]);
- Syy = std::max(1.0f, Syy);
- }
-
- return bestPitch;
-}
-
-int RNNoiseFeatureProcessor::RemoveDoubling(
- vec1D32F& pitchBuf,
- uint32_t maxPeriod,
- uint32_t minPeriod,
- uint32_t frameSize,
- size_t pitchIdx0_)
-{
- constexpr std::array<size_t, 16> secondCheck {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
- uint32_t minPeriod0 = minPeriod;
- float lastPeriod = static_cast<float>(this->m_lastPeriod)/2;
- float lastGain = static_cast<float>(this->m_lastGain);
-
- maxPeriod /= 2;
- minPeriod /= 2;
- pitchIdx0_ /= 2;
- frameSize /= 2;
- uint32_t xStart = maxPeriod;
-
- if (pitchIdx0_ >= maxPeriod) {
- pitchIdx0_ = maxPeriod - 1;
- }
-
- size_t pitchIdx = pitchIdx0_;
- const size_t pitchIdx0 = pitchIdx0_;
-
- float xx = 0;
- for ( size_t i = xStart; i < xStart+frameSize; ++i) {
- xx += (pitchBuf[i] * pitchBuf[i]);
- }
-
- float xy = 0;
- for ( size_t i = xStart; i < xStart+frameSize; ++i) {
- xy += (pitchBuf[i] * pitchBuf[i-pitchIdx0]);
- }
-
- vec1D32F yyLookup (maxPeriod+1, 0);
- yyLookup[0] = xx;
- float yy = xx;
-
- for ( size_t i = 1; i < yyLookup.size(); ++i) {
- yy = yy + (pitchBuf[xStart-i] * pitchBuf[xStart-i]) -
- (pitchBuf[xStart+frameSize-i] * pitchBuf[xStart+frameSize-i]);
- yyLookup[i] = std::max(0.0f, yy);
- }
-
- yy = yyLookup[pitchIdx0];
- float bestXy = xy;
- float bestYy = yy;
-
- float g = this->ComputePitchGain(xy, xx, yy);
- float g0 = g;
-
- /* Look for any pitch at pitchIndex/k. */
- for ( size_t k = 2; k < 16; ++k) {
- size_t pitchIdx1 = (2*pitchIdx0+k) / (2*k);
- if (pitchIdx1 < minPeriod) {
- break;
- }
-
- size_t pitchIdx1b;
- /* Look for another strong correlation at T1b. */
- if (k == 2) {
- if ((pitchIdx1 + pitchIdx0) > maxPeriod) {
- pitchIdx1b = pitchIdx0;
- } else {
- pitchIdx1b = pitchIdx0 + pitchIdx1;
- }
- } else {
- pitchIdx1b = (2*(secondCheck[k])*pitchIdx0 + k) / (2*k);
- }
-
- xy = 0;
- for ( size_t i = xStart; i < xStart+frameSize; ++i) {
- xy += (pitchBuf[i] * pitchBuf[i-pitchIdx1]);
- }
-
- float xy2 = 0;
- for ( size_t i = xStart; i < xStart+frameSize; ++i) {
- xy2 += (pitchBuf[i] * pitchBuf[i-pitchIdx1b]);
- }
- xy = 0.5f * (xy + xy2);
- VERIFY(pitchIdx1b < maxPeriod+1);
- yy = 0.5f * (yyLookup[pitchIdx1] + yyLookup[pitchIdx1b]);
-
- float g1 = this->ComputePitchGain(xy, xx, yy);
-
- float cont;
- if (std::abs(pitchIdx1-lastPeriod) <= 1) {
- cont = lastGain;
- } else if (std::abs(pitchIdx1-lastPeriod) <= 2 and 5*k*k < pitchIdx0) {
- cont = 0.5f*lastGain;
- } else {
- cont = 0.0f;
- }
-
- float thresh = std::max(0.3, 0.7*g0-cont);
-
- /* Bias against very high pitch (very short period) to avoid false-positives
- * due to short-term correlation */
- if (pitchIdx1 < 3*minPeriod) {
- thresh = std::max(0.4, 0.85*g0-cont);
- } else if (pitchIdx1 < 2*minPeriod) {
- thresh = std::max(0.5, 0.9*g0-cont);
- }
- if (g1 > thresh) {
- bestXy = xy;
- bestYy = yy;
- pitchIdx = pitchIdx1;
- g = g1;
- }
- }
-
- bestXy = std::max(0.0f, bestXy);
- float pg;
- if (bestYy <= bestXy) {
- pg = 1.0;
- } else {
- pg = bestXy/(bestYy+1);
- }
-
- std::array<float, 3> xCorr {0};
- for ( size_t k = 0; k < 3; ++k ) {
- for ( size_t i = xStart; i < xStart+frameSize; ++i) {
- xCorr[k] += (pitchBuf[i] * pitchBuf[i-(pitchIdx+k-1)]);
- }
- }
-
- size_t offset;
- if ((xCorr[2]-xCorr[0]) > 0.7*(xCorr[1]-xCorr[0])) {
- offset = 1;
- } else if ((xCorr[0]-xCorr[2]) > 0.7*(xCorr[1]-xCorr[2])) {
- offset = -1;
- } else {
- offset = 0;
- }
-
- if (pg > g) {
- pg = g;
- }
-
- pitchIdx0_ = 2*pitchIdx + offset;
-
- if (pitchIdx0_ < minPeriod0) {
- pitchIdx0_ = minPeriod0;
- }
-
- this->m_lastPeriod = pitchIdx0_;
- this->m_lastGain = pg;
-
- return this->m_lastPeriod;
-}
-
-float RNNoiseFeatureProcessor::ComputePitchGain(float xy, float xx, float yy)
-{
- return xy / math::MathUtils::SqrtF32(1+xx*yy);
-}
-
-void RNNoiseFeatureProcessor::AutoCorr(
- const vec1D32F& x,
- vec1D32F& ac,
- size_t lag,
- size_t n)
-{
- if (n < lag) {
- printf_err("Invalid parameters for AutoCorr\n");
- return;
- }
-
- auto fastN = n - lag;
-
- /* Auto-correlation - can be done by PlatformMath functions */
- this->PitchXCorr(x, x, ac, fastN, lag + 1);
-
- /* Modify auto-correlation by summing with auto-correlation for different lags. */
- for (size_t k = 0; k < lag + 1; k++) {
- float d = 0;
- for (size_t i = k + fastN; i < n; i++) {
- d += x[i] * x[i - k];
- }
- ac[k] += d;
- }
-}
-
-
-void RNNoiseFeatureProcessor::PitchXCorr(
- const vec1D32F& x,
- const vec1D32F& y,
- vec1D32F& xCorr,
- size_t len,
- size_t maxPitch)
-{
- for (size_t i = 0; i < maxPitch; i++) {
- float sum = 0;
- for (size_t j = 0; j < len; j++) {
- sum += x[j] * y[i + j];
- }
- xCorr[i] = sum;
- }
-}
-
-/* Linear predictor coefficients */
-void RNNoiseFeatureProcessor::LPC(
- const vec1D32F& correlation,
- int32_t p,
- vec1D32F& lpc)
-{
- auto error = correlation[0];
-
- if (error != 0) {
- for (int i = 0; i < p; i++) {
-
- /* Sum up this iteration's reflection coefficient */
- float rr = 0;
- for (int j = 0; j < i; j++) {
- rr += lpc[j] * correlation[i - j];
- }
-
- rr += correlation[i + 1];
- auto r = -rr / error;
-
- /* Update LP coefficients and total error */
- lpc[i] = r;
- for (int j = 0; j < ((i + 1) >> 1); j++) {
- auto tmp1 = lpc[j];
- auto tmp2 = lpc[i - 1 - j];
- lpc[j] = tmp1 + (r * tmp2);
- lpc[i - 1 - j] = tmp2 + (r * tmp1);
- }
-
- error = error - (r * r * error);
-
- /* Bail out once we get 30dB gain */
- if (error < (0.001 * correlation[0])) {
- break;
- }
- }
- }
-}
-
-void RNNoiseFeatureProcessor::Fir5(
- const vec1D32F &num,
- uint32_t N,
- vec1D32F &x)
-{
- auto num0 = num[0];
- auto num1 = num[1];
- auto num2 = num[2];
- auto num3 = num[3];
- auto num4 = num[4];
- auto mem0 = 0;
- auto mem1 = 0;
- auto mem2 = 0;
- auto mem3 = 0;
- auto mem4 = 0;
- for (uint32_t i = 0; i < N; i++)
- {
- auto sum_ = x[i] + (num0 * mem0) + (num1 * mem1) +
- (num2 * mem2) + (num3 * mem3) + (num4 * mem4);
- mem4 = mem3;
- mem3 = mem2;
- mem2 = mem1;
- mem1 = mem0;
- mem0 = x[i];
- x[i] = sum_;
- }
-}
-
-void RNNoiseFeatureProcessor::PitchFilter(FrameFeatures &features, vec1D32F &gain) {
- std::vector<float> r(NB_BANDS, 0);
- std::vector<float> rf(FREQ_SIZE, 0);
- std::vector<float> newE(NB_BANDS);
-
- for (size_t i = 0; i < NB_BANDS; i++) {
- if (features.m_Exp[i] > gain[i]) {
- r[i] = 1;
- } else {
-
-
- r[i] = std::pow(features.m_Exp[i], 2) * (1 - std::pow(gain[i], 2)) /
- (.001 + std::pow(gain[i], 2) * (1 - std::pow(features.m_Exp[i], 2)));
- }
-
-
- r[i] = math::MathUtils::SqrtF32(std::min(1.0f, std::max(0.0f, r[i])));
- r[i] *= math::MathUtils::SqrtF32(features.m_Ex[i] / (1e-8f + features.m_Ep[i]));
- }
-
- InterpBandGain(rf, r);
- for (size_t i = 0; i < FREQ_SIZE - 1; i++) {
- features.m_fftX[2 * i] += rf[i] * features.m_fftP[2 * i]; /* Real. */
- features.m_fftX[2 * i + 1] += rf[i] * features.m_fftP[2 * i + 1]; /* Imaginary. */
-
- }
- ComputeBandEnergy(features.m_fftX, newE);
- std::vector<float> norm(NB_BANDS);
- std::vector<float> normf(FRAME_SIZE, 0);
- for (size_t i = 0; i < NB_BANDS; i++) {
- norm[i] = math::MathUtils::SqrtF32(features.m_Ex[i] / (1e-8f + newE[i]));
- }
-
- InterpBandGain(normf, norm);
- for (size_t i = 0; i < FREQ_SIZE - 1; i++) {
- features.m_fftX[2 * i] *= normf[i]; /* Real. */
- features.m_fftX[2 * i + 1] *= normf[i]; /* Imaginary. */
-
- }
-}
-
-void RNNoiseFeatureProcessor::FrameSynthesis(vec1D32F& outFrame, vec1D32F& fftY) {
- std::vector<float> x(WINDOW_SIZE, 0);
- InverseTransform(x, fftY);
- ApplyWindow(x);
- for (size_t i = 0; i < FRAME_SIZE; i++) {
- outFrame[i] = x[i] + m_synthesisMem[i];
- }
- memcpy((m_synthesisMem.data()), &x[FRAME_SIZE], FRAME_SIZE*sizeof(float));
-}
-
-void RNNoiseFeatureProcessor::InterpBandGain(vec1D32F& g, vec1D32F& bandE) {
- for (size_t i = 0; i < NB_BANDS - 1; i++) {
- int bandSize = (m_eband5ms[i + 1] - m_eband5ms[i]) << FRAME_SIZE_SHIFT;
- for (int j = 0; j < bandSize; j++) {
- float frac = static_cast<float>(j) / bandSize;
- g[(m_eband5ms[i] << FRAME_SIZE_SHIFT) + j] = (1 - frac) * bandE[i] + frac * bandE[i + 1];
- }
- }
-}
-
-void RNNoiseFeatureProcessor::InverseTransform(vec1D32F& out, vec1D32F& fftXIn) {
-
- std::vector<float> x(WINDOW_SIZE * 2); /* This is complex. */
- vec1D32F newFFT; /* This is complex. */
-
- size_t i;
- for (i = 0; i < FREQ_SIZE * 2; i++) {
- x[i] = fftXIn[i];
- }
- for (i = FREQ_SIZE; i < WINDOW_SIZE; i++) {
- x[2 * i] = x[2 * (WINDOW_SIZE - i)]; /* Real. */
- x[2 * i + 1] = -x[2 * (WINDOW_SIZE - i) + 1]; /* Imaginary. */
- }
-
- constexpr uint32_t numFFt = 2 * FRAME_SIZE;
- static_assert(numFFt != 0, "numFFt cannot be 0!");
-
- vec1D32F fftOut = vec1D32F(x.size(), 0);
- math::MathUtils::FftF32(x,fftOut, m_fftInstCmplx);
-
- /* Normalize. */
- for (auto &f: fftOut) {
- f /= numFFt;
- }
-
- out[0] = WINDOW_SIZE * fftOut[0]; /* Real. */
- for (i = 1; i < WINDOW_SIZE; i++) {
- out[i] = WINDOW_SIZE * fftOut[(WINDOW_SIZE * 2) - (2 * i)]; /* Real. */
- }
-}
-
-
-} /* namespace rnn */
-} /* namespace app */
-} /* namspace arm */
diff --git a/source/use_case/noise_reduction/src/RNNoiseModel.cc b/source/use_case/noise_reduction/src/RNNoiseModel.cc
deleted file mode 100644
index 244fa1a..0000000
--- a/source/use_case/noise_reduction/src/RNNoiseModel.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "RNNoiseModel.hpp"
-#include "log_macros.h"
-
-const tflite::MicroOpResolver& arm::app::RNNoiseModel::GetOpResolver()
-{
- return this->m_opResolver;
-}
-
-bool arm::app::RNNoiseModel::EnlistOperations()
-{
- this->m_opResolver.AddUnpack();
- this->m_opResolver.AddFullyConnected();
- this->m_opResolver.AddSplit();
- this->m_opResolver.AddSplitV();
- this->m_opResolver.AddAdd();
- this->m_opResolver.AddLogistic();
- this->m_opResolver.AddMul();
- this->m_opResolver.AddSub();
- this->m_opResolver.AddTanh();
- this->m_opResolver.AddPack();
- this->m_opResolver.AddReshape();
- this->m_opResolver.AddQuantize();
- this->m_opResolver.AddConcatenation();
- this->m_opResolver.AddRelu();
-
-#if defined(ARM_NPU)
- if (kTfLiteOk == this->m_opResolver.AddEthosU()) {
- info("Added %s support to op resolver\n",
- tflite::GetString_ETHOSU());
- } else {
- printf_err("Failed to add Arm NPU support to op resolver.");
- return false;
- }
-#endif /* ARM_NPU */
- return true;
-}
-
-extern uint8_t* GetModelPointer();
-const uint8_t* arm::app::RNNoiseModel::ModelPointer()
-{
- return GetModelPointer();
-}
-
-extern size_t GetModelLen();
-size_t arm::app::RNNoiseModel::ModelSize()
-{
- return GetModelLen();
-}
-
-bool arm::app::RNNoiseModel::RunInference()
-{
- return Model::RunInference();
-}
-
-void arm::app::RNNoiseModel::ResetGruState()
-{
- for (auto& stateMapping: this->m_gruStateMap) {
- TfLiteTensor* inputGruStateTensor = this->GetInputTensor(stateMapping.second);
- auto* inputGruState = tflite::GetTensorData<int8_t>(inputGruStateTensor);
- /* Initial value of states is 0, but this is affected by quantization zero point. */
- auto quantParams = arm::app::GetTensorQuantParams(inputGruStateTensor);
- memset(inputGruState, quantParams.offset, inputGruStateTensor->bytes);
- }
-}
-
-bool arm::app::RNNoiseModel::CopyGruStates()
-{
- std::vector<std::pair<size_t, std::vector<int8_t>>> tempOutGruStates;
- /* Saving output states before copying them to input states to avoid output states modification in the tensor.
- * tflu shares input and output tensors memory, thus writing to input tensor can change output tensor values. */
- for (auto& stateMapping: this->m_gruStateMap) {
- TfLiteTensor* outputGruStateTensor = this->GetOutputTensor(stateMapping.first);
- std::vector<int8_t> tempOutGruState(outputGruStateTensor->bytes);
- auto* outGruState = tflite::GetTensorData<int8_t>(outputGruStateTensor);
- memcpy(tempOutGruState.data(), outGruState, outputGruStateTensor->bytes);
- /* Index of the input tensor and the data to copy. */
- tempOutGruStates.emplace_back(stateMapping.second, std::move(tempOutGruState));
- }
- /* Updating input GRU states with saved GRU output states. */
- for (auto& stateMapping: tempOutGruStates) {
- auto outputGruStateTensorData = stateMapping.second;
- TfLiteTensor* inputGruStateTensor = this->GetInputTensor(stateMapping.first);
- if (outputGruStateTensorData.size() != inputGruStateTensor->bytes) {
- printf_err("Unexpected number of bytes for GRU state mapping. Input = %zuz, output = %zuz.\n",
- inputGruStateTensor->bytes,
- outputGruStateTensorData.size());
- return false;
- }
- auto* inputGruState = tflite::GetTensorData<int8_t>(inputGruStateTensor);
- auto* outGruState = outputGruStateTensorData.data();
- memcpy(inputGruState, outGruState, inputGruStateTensor->bytes);
- }
- return true;
-} \ No newline at end of file
diff --git a/source/use_case/noise_reduction/src/RNNoiseProcessing.cc b/source/use_case/noise_reduction/src/RNNoiseProcessing.cc
deleted file mode 100644
index f6a3ec4..0000000
--- a/source/use_case/noise_reduction/src/RNNoiseProcessing.cc
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "RNNoiseProcessing.hpp"
-#include "log_macros.h"
-
-namespace arm {
-namespace app {
-
- RNNoisePreProcess::RNNoisePreProcess(TfLiteTensor* inputTensor,
- std::shared_ptr<rnn::RNNoiseFeatureProcessor> featureProcessor, std::shared_ptr<rnn::FrameFeatures> frameFeatures)
- : m_inputTensor{inputTensor},
- m_featureProcessor{featureProcessor},
- m_frameFeatures{frameFeatures}
- {}
-
- bool RNNoisePreProcess::DoPreProcess(const void* data, size_t inputSize)
- {
- if (data == nullptr) {
- printf_err("Data pointer is null");
- return false;
- }
-
- auto input = static_cast<const int16_t*>(data);
- this->m_audioFrame = rnn::vec1D32F(input, input + inputSize);
- m_featureProcessor->PreprocessFrame(this->m_audioFrame.data(), inputSize, *this->m_frameFeatures);
-
- QuantizeAndPopulateInput(this->m_frameFeatures->m_featuresVec,
- this->m_inputTensor->params.scale, this->m_inputTensor->params.zero_point,
- this->m_inputTensor);
-
- debug("Input tensor populated \n");
-
- return true;
- }
-
- void RNNoisePreProcess::QuantizeAndPopulateInput(rnn::vec1D32F& inputFeatures,
- const float quantScale, const int quantOffset,
- TfLiteTensor* inputTensor)
- {
- const float minVal = std::numeric_limits<int8_t>::min();
- const float maxVal = std::numeric_limits<int8_t>::max();
-
- auto* inputTensorData = tflite::GetTensorData<int8_t>(inputTensor);
-
- for (size_t i=0; i < inputFeatures.size(); ++i) {
- float quantValue = ((inputFeatures[i] / quantScale) + quantOffset);
- inputTensorData[i] = static_cast<int8_t>(std::min<float>(std::max<float>(quantValue, minVal), maxVal));
- }
- }
-
- RNNoisePostProcess::RNNoisePostProcess(TfLiteTensor* outputTensor,
- std::vector<int16_t>& denoisedAudioFrame,
- std::shared_ptr<rnn::RNNoiseFeatureProcessor> featureProcessor,
- std::shared_ptr<rnn::FrameFeatures> frameFeatures)
- : m_outputTensor{outputTensor},
- m_denoisedAudioFrame{denoisedAudioFrame},
- m_featureProcessor{featureProcessor},
- m_frameFeatures{frameFeatures}
- {
- this->m_denoisedAudioFrameFloat.reserve(denoisedAudioFrame.size());
- this->m_modelOutputFloat.resize(outputTensor->bytes);
- }
-
- bool RNNoisePostProcess::DoPostProcess()
- {
- const auto* outputData = tflite::GetTensorData<int8_t>(this->m_outputTensor);
- auto outputQuantParams = GetTensorQuantParams(this->m_outputTensor);
-
- for (size_t i = 0; i < this->m_outputTensor->bytes; ++i) {
- this->m_modelOutputFloat[i] = (static_cast<float>(outputData[i]) - outputQuantParams.offset)
- * outputQuantParams.scale;
- }
-
- this->m_featureProcessor->PostProcessFrame(this->m_modelOutputFloat,
- *this->m_frameFeatures, this->m_denoisedAudioFrameFloat);
-
- for (size_t i = 0; i < this->m_denoisedAudioFrame.size(); ++i) {
- this->m_denoisedAudioFrame[i] = static_cast<int16_t>(
- std::roundf(this->m_denoisedAudioFrameFloat[i]));
- }
-
- return true;
- }
-
-} /* namespace app */
-} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/noise_reduction/usecase.cmake b/source/use_case/noise_reduction/usecase.cmake
index 8dfde58..0cd0761 100644
--- a/source/use_case/noise_reduction/usecase.cmake
+++ b/source/use_case/noise_reduction/usecase.cmake
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#----------------------------------------------------------------------------
+# Append the API to use for this use case
+list(APPEND ${use_case}_API_LIST "noise_reduction")
USER_OPTION(${use_case}_ACTIVATION_BUF_SZ "Activation buffer size for the chosen model"
0x00200000
diff --git a/source/use_case/object_detection/include/DetectionResult.hpp b/source/use_case/object_detection/include/DetectionResult.hpp
deleted file mode 100644
index aa74d90..0000000
--- a/source/use_case/object_detection/include/DetectionResult.hpp
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef DETECTION_RESULT_HPP
-#define DETECTION_RESULT_HPP
-
-
-namespace arm {
-namespace app {
-namespace object_detection {
-
- /**
- * @brief Class representing a single detection result.
- */
- class DetectionResult {
- public:
- /**
- * @brief Constructor
- * @param[in] normalisedVal Result normalized value
- * @param[in] x0 Top corner x starting point
- * @param[in] y0 Top corner y starting point
- * @param[in] w Detection result width
- * @param[in] h Detection result height
- **/
- DetectionResult(double normalisedVal,int x0,int y0, int w,int h) :
- m_normalisedVal(normalisedVal),
- m_x0(x0),
- m_y0(y0),
- m_w(w),
- m_h(h)
- {
- }
-
- DetectionResult() = default;
- ~DetectionResult() = default;
-
- double m_normalisedVal{0.0};
- int m_x0{0};
- int m_y0{0};
- int m_w{0};
- int m_h{0};
- };
-
-} /* namespace object_detection */
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* DETECTION_RESULT_HPP */
diff --git a/source/use_case/object_detection/include/DetectorPostProcessing.hpp b/source/use_case/object_detection/include/DetectorPostProcessing.hpp
deleted file mode 100644
index b3ddb2c..0000000
--- a/source/use_case/object_detection/include/DetectorPostProcessing.hpp
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef DETECTOR_POST_PROCESSING_HPP
-#define DETECTOR_POST_PROCESSING_HPP
-
-#include "UseCaseCommonUtils.hpp"
-#include "ImageUtils.hpp"
-#include "DetectionResult.hpp"
-#include "YoloFastestModel.hpp"
-#include "BaseProcessing.hpp"
-
-#include <forward_list>
-
-namespace arm {
-namespace app {
-
-namespace object_detection {
-
- struct Branch {
- int resolution;
- int numBox;
- const float* anchor;
- int8_t* modelOutput;
- float scale;
- int zeroPoint;
- size_t size;
- };
-
- struct Network {
- int inputWidth;
- int inputHeight;
- int numClasses;
- std::vector<Branch> branches;
- int topN;
- };
-
-} /* namespace object_detection */
-
- /**
- * @brief Post-processing class for Object Detection use case.
- * Implements methods declared by BasePostProcess and anything else needed
- * to populate result vector.
- */
- class DetectorPostProcess : public BasePostProcess {
- public:
- /**
- * @brief Constructor.
- * @param[in] outputTensor0 Pointer to the TFLite Micro output Tensor at index 0.
- * @param[in] outputTensor1 Pointer to the TFLite Micro output Tensor at index 1.
- * @param[out] results Vector of detected results.
- * @param[in] inputImgRows Number of rows in the input image.
- * @param[in] inputImgCols Number of columns in the input image.
- * @param[in] threshold Post-processing threshold.
- * @param[in] nms Non-maximum Suppression threshold.
- * @param[in] numClasses Number of classes.
- * @param[in] topN Top N for each class.
- **/
- explicit DetectorPostProcess(TfLiteTensor* outputTensor0,
- TfLiteTensor* outputTensor1,
- std::vector<object_detection::DetectionResult>& results,
- int inputImgRows,
- int inputImgCols,
- float threshold = 0.5f,
- float nms = 0.45f,
- int numClasses = 1,
- int topN = 0);
-
- /**
- * @brief Should perform YOLO post-processing of the result of inference then
- * populate Detection result data for any later use.
- * @return true if successful, false otherwise.
- **/
- bool DoPostProcess() override;
-
- private:
- TfLiteTensor* m_outputTensor0; /* Output tensor index 0 */
- TfLiteTensor* m_outputTensor1; /* Output tensor index 1 */
- std::vector<object_detection::DetectionResult>& m_results; /* Single inference results. */
- int m_inputImgRows; /* Number of rows for model input. */
- int m_inputImgCols; /* Number of cols for model input. */
- float m_threshold; /* Post-processing threshold. */
- float m_nms; /* NMS threshold. */
- int m_numClasses; /* Number of classes. */
- int m_topN; /* TopN. */
- object_detection::Network m_net; /* YOLO network object. */
-
- /**
- * @brief Insert the given Detection in the list.
- * @param[in] detections List of detections.
- * @param[in] det Detection to be inserted.
- **/
- void InsertTopNDetections(std::forward_list<image::Detection>& detections, image::Detection& det);
-
- /**
- * @brief Given a Network calculate the detection boxes.
- * @param[in] net Network.
- * @param[in] imageWidth Original image width.
- * @param[in] imageHeight Original image height.
- * @param[in] threshold Detections threshold.
- * @param[out] detections Detection boxes.
- **/
- void GetNetworkBoxes(object_detection::Network& net,
- int imageWidth,
- int imageHeight,
- float threshold,
- std::forward_list<image::Detection>& detections);
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* DETECTOR_POST_PROCESSING_HPP */
diff --git a/source/use_case/object_detection/include/DetectorPreProcessing.hpp b/source/use_case/object_detection/include/DetectorPreProcessing.hpp
deleted file mode 100644
index 4936048..0000000
--- a/source/use_case/object_detection/include/DetectorPreProcessing.hpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef DETECTOR_PRE_PROCESSING_HPP
-#define DETECTOR_PRE_PROCESSING_HPP
-
-#include "BaseProcessing.hpp"
-#include "Classifier.hpp"
-
-namespace arm {
-namespace app {
-
- /**
- * @brief Pre-processing class for Object detection use case.
- * Implements methods declared by BasePreProcess and anything else needed
- * to populate input tensors ready for inference.
- */
- class DetectorPreProcess : public BasePreProcess {
-
- public:
- /**
- * @brief Constructor
- * @param[in] inputTensor Pointer to the TFLite Micro input Tensor.
- * @param[in] rgb2Gray Convert image from 3 channel RGB to 1 channel grayscale.
- * @param[in] convertToInt8 Convert the image from uint8 to int8 range.
- **/
- explicit DetectorPreProcess(TfLiteTensor* inputTensor, bool rgb2Gray, bool convertToInt8);
-
- /**
- * @brief Should perform pre-processing of 'raw' input image data and load it into
- * TFLite Micro input tensor ready for inference
- * @param[in] input Pointer to the data that pre-processing will work on.
- * @param[in] inputSize Size of the input data.
- * @return true if successful, false otherwise.
- **/
- bool DoPreProcess(const void* input, size_t inputSize) override;
-
- private:
- TfLiteTensor* m_inputTensor;
- bool m_rgb2Gray;
- bool m_convertToInt8;
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* DETECTOR_PRE_PROCESSING_HPP */ \ No newline at end of file
diff --git a/source/use_case/object_detection/include/YoloFastestModel.hpp b/source/use_case/object_detection/include/YoloFastestModel.hpp
deleted file mode 100644
index 2986a58..0000000
--- a/source/use_case/object_detection/include/YoloFastestModel.hpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef YOLO_FASTEST_MODEL_HPP
-#define YOLO_FASTEST_MODEL_HPP
-
-#include "Model.hpp"
-
-extern const int originalImageSize;
-extern const int channelsImageDisplayed;
-extern const float anchor1[];
-extern const float anchor2[];
-
-namespace arm {
-namespace app {
-
- class YoloFastestModel : public Model {
-
- public:
- /* Indices for the expected model - based on input tensor shape */
- static constexpr uint32_t ms_inputRowsIdx = 1;
- static constexpr uint32_t ms_inputColsIdx = 2;
- static constexpr uint32_t ms_inputChannelsIdx = 3;
-
- protected:
- /** @brief Gets the reference to op resolver interface class. */
- const tflite::MicroOpResolver& GetOpResolver() override;
-
- /** @brief Adds operations to the op resolver instance. */
- bool EnlistOperations() override;
-
- const uint8_t* ModelPointer() override;
-
- size_t ModelSize() override;
-
- private:
- /* Maximum number of individual operations that can be enlisted. */
- static constexpr int ms_maxOpCnt = 8;
-
- /* A mutable op resolver instance. */
- tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* YOLO_FASTEST_MODEL_HPP */
diff --git a/source/use_case/object_detection/src/DetectorPostProcessing.cc b/source/use_case/object_detection/src/DetectorPostProcessing.cc
deleted file mode 100644
index fb1606a..0000000
--- a/source/use_case/object_detection/src/DetectorPostProcessing.cc
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "DetectorPostProcessing.hpp"
-#include "PlatformMath.hpp"
-
-#include <cmath>
-
-namespace arm {
-namespace app {
-
- DetectorPostProcess::DetectorPostProcess(
- TfLiteTensor* modelOutput0,
- TfLiteTensor* modelOutput1,
- std::vector<object_detection::DetectionResult>& results,
- int inputImgRows,
- int inputImgCols,
- const float threshold,
- const float nms,
- int numClasses,
- int topN)
- : m_outputTensor0{modelOutput0},
- m_outputTensor1{modelOutput1},
- m_results{results},
- m_inputImgRows{inputImgRows},
- m_inputImgCols{inputImgCols},
- m_threshold(threshold),
- m_nms(nms),
- m_numClasses(numClasses),
- m_topN(topN)
-{
- /* Init PostProcessing */
- this->m_net =
- object_detection::Network {
- .inputWidth = inputImgCols,
- .inputHeight = inputImgRows,
- .numClasses = numClasses,
- .branches = {
- object_detection::Branch {
- .resolution = inputImgCols/32,
- .numBox = 3,
- .anchor = anchor1,
- .modelOutput = this->m_outputTensor0->data.int8,
- .scale = (static_cast<TfLiteAffineQuantization*>(
- this->m_outputTensor0->quantization.params))->scale->data[0],
- .zeroPoint = (static_cast<TfLiteAffineQuantization*>(
- this->m_outputTensor0->quantization.params))->zero_point->data[0],
- .size = this->m_outputTensor0->bytes
- },
- object_detection::Branch {
- .resolution = inputImgCols/16,
- .numBox = 3,
- .anchor = anchor2,
- .modelOutput = this->m_outputTensor1->data.int8,
- .scale = (static_cast<TfLiteAffineQuantization*>(
- this->m_outputTensor1->quantization.params))->scale->data[0],
- .zeroPoint = (static_cast<TfLiteAffineQuantization*>(
- this->m_outputTensor1->quantization.params))->zero_point->data[0],
- .size = this->m_outputTensor1->bytes
- }
- },
- .topN = m_topN
- };
- /* End init */
-}
-
-bool DetectorPostProcess::DoPostProcess()
-{
- /* Start postprocessing */
- int originalImageWidth = originalImageSize;
- int originalImageHeight = originalImageSize;
-
- std::forward_list<image::Detection> detections;
- GetNetworkBoxes(this->m_net, originalImageWidth, originalImageHeight, m_threshold, detections);
-
- /* Do nms */
- CalculateNMS(detections, this->m_net.numClasses, m_nms);
-
- for (auto& it: detections) {
- float xMin = it.bbox.x - it.bbox.w / 2.0f;
- float xMax = it.bbox.x + it.bbox.w / 2.0f;
- float yMin = it.bbox.y - it.bbox.h / 2.0f;
- float yMax = it.bbox.y + it.bbox.h / 2.0f;
-
- if (xMin < 0) {
- xMin = 0;
- }
- if (yMin < 0) {
- yMin = 0;
- }
- if (xMax > originalImageWidth) {
- xMax = originalImageWidth;
- }
- if (yMax > originalImageHeight) {
- yMax = originalImageHeight;
- }
-
- float boxX = xMin;
- float boxY = yMin;
- float boxWidth = xMax - xMin;
- float boxHeight = yMax - yMin;
-
- for (int j = 0; j < this->m_net.numClasses; ++j) {
- if (it.prob[j] > 0) {
-
- object_detection::DetectionResult tmpResult = {};
- tmpResult.m_normalisedVal = it.prob[j];
- tmpResult.m_x0 = boxX;
- tmpResult.m_y0 = boxY;
- tmpResult.m_w = boxWidth;
- tmpResult.m_h = boxHeight;
-
- this->m_results.push_back(tmpResult);
- }
- }
- }
- return true;
-}
-
-void DetectorPostProcess::InsertTopNDetections(std::forward_list<image::Detection>& detections, image::Detection& det)
-{
- std::forward_list<image::Detection>::iterator it;
- std::forward_list<image::Detection>::iterator last_it;
- for ( it = detections.begin(); it != detections.end(); ++it ) {
- if(it->objectness > det.objectness)
- break;
- last_it = it;
- }
- if(it != detections.begin()) {
- detections.emplace_after(last_it, det);
- detections.pop_front();
- }
-}
-
-void DetectorPostProcess::GetNetworkBoxes(
- object_detection::Network& net,
- int imageWidth,
- int imageHeight,
- float threshold,
- std::forward_list<image::Detection>& detections)
-{
- int numClasses = net.numClasses;
- int num = 0;
- auto det_objectness_comparator = [](image::Detection& pa, image::Detection& pb) {
- return pa.objectness < pb.objectness;
- };
- for (size_t i = 0; i < net.branches.size(); ++i) {
- int height = net.branches[i].resolution;
- int width = net.branches[i].resolution;
- int channel = net.branches[i].numBox*(5+numClasses);
-
- for (int h = 0; h < net.branches[i].resolution; h++) {
- for (int w = 0; w < net.branches[i].resolution; w++) {
- for (int anc = 0; anc < net.branches[i].numBox; anc++) {
-
- /* Objectness score */
- int bbox_obj_offset = h * width * channel + w * channel + anc * (numClasses + 5) + 4;
- float objectness = math::MathUtils::SigmoidF32(
- (static_cast<float>(net.branches[i].modelOutput[bbox_obj_offset])
- - net.branches[i].zeroPoint
- ) * net.branches[i].scale);
-
- if(objectness > threshold) {
- image::Detection det;
- det.objectness = objectness;
- /* Get bbox prediction data for each anchor, each feature point */
- int bbox_x_offset = bbox_obj_offset -4;
- int bbox_y_offset = bbox_x_offset + 1;
- int bbox_w_offset = bbox_x_offset + 2;
- int bbox_h_offset = bbox_x_offset + 3;
- int bbox_scores_offset = bbox_x_offset + 5;
-
- det.bbox.x = (static_cast<float>(net.branches[i].modelOutput[bbox_x_offset])
- - net.branches[i].zeroPoint) * net.branches[i].scale;
- det.bbox.y = (static_cast<float>(net.branches[i].modelOutput[bbox_y_offset])
- - net.branches[i].zeroPoint) * net.branches[i].scale;
- det.bbox.w = (static_cast<float>(net.branches[i].modelOutput[bbox_w_offset])
- - net.branches[i].zeroPoint) * net.branches[i].scale;
- det.bbox.h = (static_cast<float>(net.branches[i].modelOutput[bbox_h_offset])
- - net.branches[i].zeroPoint) * net.branches[i].scale;
-
- float bbox_x, bbox_y;
-
- /* Eliminate grid sensitivity trick involved in YOLOv4 */
- bbox_x = math::MathUtils::SigmoidF32(det.bbox.x);
- bbox_y = math::MathUtils::SigmoidF32(det.bbox.y);
- det.bbox.x = (bbox_x + w) / width;
- det.bbox.y = (bbox_y + h) / height;
-
- det.bbox.w = std::exp(det.bbox.w) * net.branches[i].anchor[anc*2] / net.inputWidth;
- det.bbox.h = std::exp(det.bbox.h) * net.branches[i].anchor[anc*2+1] / net.inputHeight;
-
- for (int s = 0; s < numClasses; s++) {
- float sig = math::MathUtils::SigmoidF32(
- (static_cast<float>(net.branches[i].modelOutput[bbox_scores_offset + s]) -
- net.branches[i].zeroPoint) * net.branches[i].scale
- ) * objectness;
- det.prob.emplace_back((sig > threshold) ? sig : 0);
- }
-
- /* Correct_YOLO_boxes */
- det.bbox.x *= imageWidth;
- det.bbox.w *= imageWidth;
- det.bbox.y *= imageHeight;
- det.bbox.h *= imageHeight;
-
- if (num < net.topN || net.topN <=0) {
- detections.emplace_front(det);
- num += 1;
- } else if (num == net.topN) {
- detections.sort(det_objectness_comparator);
- InsertTopNDetections(detections,det);
- num += 1;
- } else {
- InsertTopNDetections(detections,det);
- }
- }
- }
- }
- }
- }
- if(num > net.topN)
- num -=1;
-}
-
-} /* namespace app */
-} /* namespace arm */
diff --git a/source/use_case/object_detection/src/DetectorPreProcessing.cc b/source/use_case/object_detection/src/DetectorPreProcessing.cc
deleted file mode 100644
index 7212046..0000000
--- a/source/use_case/object_detection/src/DetectorPreProcessing.cc
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "DetectorPreProcessing.hpp"
-#include "ImageUtils.hpp"
-#include "log_macros.h"
-
-namespace arm {
-namespace app {
-
- DetectorPreProcess::DetectorPreProcess(TfLiteTensor* inputTensor, bool rgb2Gray, bool convertToInt8)
- : m_inputTensor{inputTensor},
- m_rgb2Gray{rgb2Gray},
- m_convertToInt8{convertToInt8}
- {}
-
- bool DetectorPreProcess::DoPreProcess(const void* data, size_t inputSize) {
- if (data == nullptr) {
- printf_err("Data pointer is null");
- }
-
- auto input = static_cast<const uint8_t*>(data);
-
- if (this->m_rgb2Gray) {
- image::RgbToGrayscale(input, this->m_inputTensor->data.uint8, this->m_inputTensor->bytes);
- } else {
- std::memcpy(this->m_inputTensor->data.data, input, inputSize);
- }
- debug("Input tensor populated \n");
-
- if (this->m_convertToInt8) {
- image::ConvertImgToInt8(this->m_inputTensor->data.data, this->m_inputTensor->bytes);
- }
-
- return true;
- }
-
-} /* namespace app */
-} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/object_detection/src/MainLoop.cc b/source/use_case/object_detection/src/MainLoop.cc
index 4291164..d119501 100644
--- a/source/use_case/object_detection/src/MainLoop.cc
+++ b/source/use_case/object_detection/src/MainLoop.cc
@@ -19,7 +19,17 @@
#include "YoloFastestModel.hpp" /* Model class for running inference. */
#include "UseCaseHandler.hpp" /* Handlers for different user options. */
#include "UseCaseCommonUtils.hpp" /* Utils functions. */
-#include "log_macros.h"
+#include "log_macros.h" /* Logging functions */
+#include "BufAttributes.hpp" /* Buffer attributes to be applied */
+
+namespace arm {
+ namespace app {
+ static uint8_t tensorArena[ACTIVATION_BUF_SZ] ACTIVATION_BUF_ATTRIBUTE;
+ } /* namespace app */
+} /* namespace arm */
+
+extern uint8_t* GetModelPointer();
+extern size_t GetModelLen();
static void DisplayDetectionMenu()
{
@@ -40,11 +50,22 @@ void main_loop()
arm::app::YoloFastestModel model; /* Model wrapper object. */
/* Load the model. */
- if (!model.Init()) {
+ if (!model.Init(arm::app::tensorArena,
+ sizeof(arm::app::tensorArena),
+ GetModelPointer(),
+ GetModelLen())) {
printf_err("Failed to initialise model\n");
return;
}
+#if !defined(ARM_NPU)
+ /* If it is not a NPU build check if the model contains a NPU operator */
+ if (model.ContainsEthosUOperator()) {
+ printf_err("No driver support for Ethos-U operator found in the model.\n");
+ return;
+ }
+#endif /* ARM_NPU */
+
/* Instantiate application context. */
arm::app::ApplicationContext caseContext;
diff --git a/source/use_case/object_detection/src/YoloFastestModel.cc b/source/use_case/object_detection/src/YoloFastestModel.cc
deleted file mode 100644
index b1fd776..0000000
--- a/source/use_case/object_detection/src/YoloFastestModel.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "YoloFastestModel.hpp"
-
-#include "log_macros.h"
-
-const tflite::MicroOpResolver& arm::app::YoloFastestModel::GetOpResolver()
-{
- return this->m_opResolver;
-}
-
-bool arm::app::YoloFastestModel::EnlistOperations()
-{
- this->m_opResolver.AddDepthwiseConv2D();
- this->m_opResolver.AddConv2D();
- this->m_opResolver.AddAdd();
- this->m_opResolver.AddResizeNearestNeighbor();
- /*These are needed for UT to work, not needed on FVP */
- this->m_opResolver.AddPad();
- this->m_opResolver.AddMaxPool2D();
- this->m_opResolver.AddConcatenation();
-
-#if defined(ARM_NPU)
- if (kTfLiteOk == this->m_opResolver.AddEthosU()) {
- info("Added %s support to op resolver\n",
- tflite::GetString_ETHOSU());
- } else {
- printf_err("Failed to add Arm NPU support to op resolver.");
- return false;
- }
-#endif /* ARM_NPU */
- return true;
-}
-
-extern uint8_t* GetModelPointer();
-const uint8_t* arm::app::YoloFastestModel::ModelPointer()
-{
- return GetModelPointer();
-}
-
-extern size_t GetModelLen();
-size_t arm::app::YoloFastestModel::ModelSize()
-{
- return GetModelLen();
-}
diff --git a/source/use_case/object_detection/usecase.cmake b/source/use_case/object_detection/usecase.cmake
index 42c4f2c..850e7fc 100644
--- a/source/use_case/object_detection/usecase.cmake
+++ b/source/use_case/object_detection/usecase.cmake
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#----------------------------------------------------------------------------
+# Append the API to use for this use case
+list(APPEND ${use_case}_API_LIST "object_detection")
USER_OPTION(${use_case}_FILE_PATH "Directory with custom image files to use, or path to a single image, in the evaluation application"
${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/samples/
diff --git a/source/use_case/vww/include/VisualWakeWordModel.hpp b/source/use_case/vww/include/VisualWakeWordModel.hpp
deleted file mode 100644
index 1ed9202..0000000
--- a/source/use_case/vww/include/VisualWakeWordModel.hpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2021 - 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef VISUAL_WAKE_WORD_MODEL_HPP
-#define VISUAL_WAKE_WORD_MODEL_HPP
-
-#include "Model.hpp"
-
-namespace arm {
-namespace app {
-
- class VisualWakeWordModel : public Model {
-
- public:
- /* Indices for the expected model - based on input tensor shape */
- static constexpr uint32_t ms_inputRowsIdx = 1;
- static constexpr uint32_t ms_inputColsIdx = 2;
- static constexpr uint32_t ms_inputChannelsIdx = 3;
-
- protected:
- /** @brief Gets the reference to op resolver interface class. */
- const tflite::MicroOpResolver& GetOpResolver() override;
-
- /** @brief Adds operations to the op resolver instance. */
- bool EnlistOperations() override;
-
- const uint8_t* ModelPointer() override;
-
- size_t ModelSize() override;
- private:
- /* Maximum number of individual operations that can be enlisted. */
- static constexpr int ms_maxOpCnt = 7;
-
- /* A mutable op resolver instance. */
- tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* VISUAL_WAKE_WORD_MODEL_HPP */
diff --git a/source/use_case/vww/include/VisualWakeWordProcessing.hpp b/source/use_case/vww/include/VisualWakeWordProcessing.hpp
deleted file mode 100644
index f9f9d72..0000000
--- a/source/use_case/vww/include/VisualWakeWordProcessing.hpp
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef VWW_PROCESSING_HPP
-#define VWW_PROCESSING_HPP
-
-#include "BaseProcessing.hpp"
-#include "Model.hpp"
-#include "Classifier.hpp"
-
-namespace arm {
-namespace app {
-
- /**
- * @brief Pre-processing class for Visual Wake Word use case.
- * Implements methods declared by BasePreProcess and anything else needed
- * to populate input tensors ready for inference.
- */
- class VisualWakeWordPreProcess : public BasePreProcess {
-
- public:
- /**
- * @brief Constructor
- * @param[in] inputTensor Pointer to the TFLite Micro input Tensor.
- * @param[in] rgb2Gray Convert image from 3 channel RGB to 1 channel grayscale.
- **/
- explicit VisualWakeWordPreProcess(TfLiteTensor* inputTensor, bool rgb2Gray=true);
-
- /**
- * @brief Should perform pre-processing of 'raw' input image data and load it into
- * TFLite Micro input tensors ready for inference
- * @param[in] input Pointer to the data that pre-processing will work on.
- * @param[in] inputSize Size of the input data.
- * @return true if successful, false otherwise.
- **/
- bool DoPreProcess(const void* input, size_t inputSize) override;
-
- private:
- TfLiteTensor* m_inputTensor;
- bool m_rgb2Gray;
- };
-
- /**
- * @brief Post-processing class for Visual Wake Word use case.
- * Implements methods declared by BasePostProcess and anything else needed
- * to populate result vector.
- */
- class VisualWakeWordPostProcess : public BasePostProcess {
-
- private:
- TfLiteTensor* m_outputTensor;
- Classifier& m_vwwClassifier;
- const std::vector<std::string>& m_labels;
- std::vector<ClassificationResult>& m_results;
-
- public:
- /**
- * @brief Constructor
- * @param[in] outputTensor Pointer to the TFLite Micro output Tensor.
- * @param[in] classifier Classifier object used to get top N results from classification.
- * @param[in] model Pointer to the VWW classification Model object.
- * @param[in] labels Vector of string labels to identify each output of the model.
- * @param[out] results Vector of classification results to store decoded outputs.
- **/
- VisualWakeWordPostProcess(TfLiteTensor* outputTensor, Classifier& classifier,
- const std::vector<std::string>& labels,
- std::vector<ClassificationResult>& results);
-
- /**
- * @brief Should perform post-processing of the result of inference then
- * populate classification result data for any later use.
- * @return true if successful, false otherwise.
- **/
- bool DoPostProcess() override;
- };
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* VWW_PROCESSING_HPP */ \ No newline at end of file
diff --git a/source/use_case/vww/src/MainLoop.cc b/source/use_case/vww/src/MainLoop.cc
index 041ea18..2161b0a 100644
--- a/source/use_case/vww/src/MainLoop.cc
+++ b/source/use_case/vww/src/MainLoop.cc
@@ -21,7 +21,17 @@
#include "VisualWakeWordModel.hpp" /* Model class for running inference. */
#include "UseCaseHandler.hpp" /* Handlers for different user options. */
#include "UseCaseCommonUtils.hpp" /* Utils functions. */
-#include "log_macros.h"
+#include "log_macros.h" /* Logging functions */
+#include "BufAttributes.hpp" /* Buffer attributes to be applied */
+
+namespace arm {
+ namespace app {
+ static uint8_t tensorArena[ACTIVATION_BUF_SZ] ACTIVATION_BUF_ATTRIBUTE;
+ } /* namespace app */
+} /* namespace arm */
+
+extern uint8_t* GetModelPointer();
+extern size_t GetModelLen();
using ViusalWakeWordClassifier = arm::app::Classifier;
@@ -30,11 +40,22 @@ void main_loop()
arm::app::VisualWakeWordModel model; /* Model wrapper object. */
/* Load the model. */
- if (!model.Init()) {
+ if (!model.Init(arm::app::tensorArena,
+ sizeof(arm::app::tensorArena),
+ GetModelPointer(),
+ GetModelLen())) {
printf_err("Failed to initialise model\n");
return;
}
+#if !defined(ARM_NPU)
+ /* If it is not a NPU build check if the model contains a NPU operator */
+ if (model.ContainsEthosUOperator()) {
+ printf_err("No driver support for Ethos-U operator found in the model.\n");
+ return;
+ }
+#endif /* ARM_NPU */
+
/* Instantiate application context. */
arm::app::ApplicationContext caseContext;
@@ -55,7 +76,7 @@ void main_loop()
constexpr bool bUseMenu = NUMBER_OF_FILES > 1 ? true : false;
do {
int menuOption = common::MENU_OPT_RUN_INF_NEXT;
- if (bUseMenu) {
+ if (bUseMenu) {
DisplayCommonMenu();
menuOption = arm::app::ReadUserInputAsInt();
printf("\n");
diff --git a/source/use_case/vww/src/VisualWakeWordModel.cc b/source/use_case/vww/src/VisualWakeWordModel.cc
deleted file mode 100644
index 59beccc..0000000
--- a/source/use_case/vww/src/VisualWakeWordModel.cc
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "VisualWakeWordModel.hpp"
-#include "log_macros.h"
-
-const tflite::MicroOpResolver& arm::app::VisualWakeWordModel::GetOpResolver()
-{
- return this->m_opResolver;
-}
-
-bool arm::app::VisualWakeWordModel::EnlistOperations()
-{
- this->m_opResolver.AddDepthwiseConv2D();
- this->m_opResolver.AddConv2D();
- this->m_opResolver.AddAveragePool2D();
- this->m_opResolver.AddReshape();
- this->m_opResolver.AddPad();
- this->m_opResolver.AddAdd();
-
-#if defined(ARM_NPU)
- if (kTfLiteOk == this->m_opResolver.AddEthosU()) {
- info("Added %s support to op resolver\n",
- tflite::GetString_ETHOSU());
- } else {
- printf_err("Failed to add Arm NPU support to op resolver.");
- return false;
- }
-#endif /* ARM_NPU */
- return true;
-}
-
-extern uint8_t* GetModelPointer();
-const uint8_t* arm::app::VisualWakeWordModel::ModelPointer()
-{
- return GetModelPointer();
-}
-
-extern size_t GetModelLen();
-size_t arm::app::VisualWakeWordModel::ModelSize()
-{
- return GetModelLen();
-} \ No newline at end of file
diff --git a/source/use_case/vww/src/VisualWakeWordProcessing.cc b/source/use_case/vww/src/VisualWakeWordProcessing.cc
deleted file mode 100644
index 4ae8a54..0000000
--- a/source/use_case/vww/src/VisualWakeWordProcessing.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "VisualWakeWordProcessing.hpp"
-
-#include "ImageUtils.hpp"
-#include "VisualWakeWordModel.hpp"
-#include "log_macros.h"
-
-namespace arm {
-namespace app {
-
- VisualWakeWordPreProcess::VisualWakeWordPreProcess(TfLiteTensor* inputTensor, bool rgb2Gray)
- :m_inputTensor{inputTensor},
- m_rgb2Gray{rgb2Gray}
- {}
-
- bool VisualWakeWordPreProcess::DoPreProcess(const void* data, size_t inputSize)
- {
- if (data == nullptr) {
- printf_err("Data pointer is null");
- }
-
- auto input = static_cast<const uint8_t*>(data);
-
- uint8_t* unsignedDstPtr = this->m_inputTensor->data.uint8;
-
- if (this->m_rgb2Gray) {
- image::RgbToGrayscale(input, unsignedDstPtr, inputSize);
- } else {
- std::memcpy(unsignedDstPtr, input, inputSize);
- }
-
- /* VWW model pre-processing is image conversion from uint8 to [0,1] float values,
- * then quantize them with input quantization info. */
- QuantParams inQuantParams = GetTensorQuantParams(this->m_inputTensor);
-
- int8_t* signedDstPtr = this->m_inputTensor->data.int8;
- for (size_t i = 0; i < this->m_inputTensor->bytes; i++) {
- auto i_data_int8 = static_cast<int8_t>(
- ((static_cast<float>(unsignedDstPtr[i]) / 255.0f) / inQuantParams.scale) + inQuantParams.offset
- );
- signedDstPtr[i] = std::min<int8_t>(INT8_MAX, std::max<int8_t>(i_data_int8, INT8_MIN));
- }
-
- debug("Input tensor populated \n");
-
- return true;
- }
-
- VisualWakeWordPostProcess::VisualWakeWordPostProcess(TfLiteTensor* outputTensor, Classifier& classifier,
- const std::vector<std::string>& labels, std::vector<ClassificationResult>& results)
- :m_outputTensor{outputTensor},
- m_vwwClassifier{classifier},
- m_labels{labels},
- m_results{results}
- {}
-
- bool VisualWakeWordPostProcess::DoPostProcess()
- {
- return this->m_vwwClassifier.GetClassificationResults(
- this->m_outputTensor, this->m_results,
- this->m_labels, 1, true);
- }
-
-} /* namespace app */
-} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/vww/usecase.cmake b/source/use_case/vww/usecase.cmake
index 8bf55fc..f6a3efe 100644
--- a/source/use_case/vww/usecase.cmake
+++ b/source/use_case/vww/usecase.cmake
@@ -1,3 +1,4 @@
+#----------------------------------------------------------------------------
# Copyright (c) 2021 Arm Limited. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
@@ -12,7 +13,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+#----------------------------------------------------------------------------
+# Append the API to use for this use case
+list(APPEND ${use_case}_API_LIST "vww")
USER_OPTION(${use_case}_FILE_PATH "Directory with custom image files, or path to a single image file, to use in the evaluation application"
${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/samples/